Home
last modified time | relevance | path

Searched refs:vacc0123p0 (Results 1 – 25 of 35) sorted by relevance

12

/external/XNNPACK/src/f32-dwconv/gen/
Dup4x25-psimd.c88 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up4x25__psimd() local
95 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
101 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
107 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
113 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
119 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
125 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
131 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
137 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
143 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd()
[all …]
Dup4x25-sse.c88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x25__sse() local
95 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
101 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
107 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
113 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
119 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
125 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
131 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
137 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
143 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse()
[all …]
Dup8x25-psimd.c88 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up8x25__psimd() local
98 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
107 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
116 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
125 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
134 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
143 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
152 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
161 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
170 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd()
[all …]
Dup8x25-sse.c88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__sse() local
98 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
107 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
116 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
125 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
134 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
143 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
152 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
161 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
170 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
[all …]
Dup8x9-psimd.c56 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up8x9__psimd() local
66 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
75 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
84 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
93 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
102 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
111 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
120 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
129 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
138 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd()
[all …]
Dup8x9-neon.c56 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neon() local
64 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
71 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
78 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
85 vacc0123p0 = vmlaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
92 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
99 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
106 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
113 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
120 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neon()
[all …]
Dup8x9-sse.c56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__sse() local
66 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
75 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
84 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
93 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
102 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
111 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
120 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
129 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
138 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse()
[all …]
Dup8x9-neonfma.c56 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma() local
64 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
71 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
78 vacc0123p0 = vfmaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
85 vacc0123p0 = vfmaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
92 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
99 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
106 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
113 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
120 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma()
[all …]
Dup4x9-neon.c56 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neon() local
61 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
65 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
69 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
73 vacc0123p0 = vmlaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
77 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
81 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
85 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
89 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
93 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__neon()
[all …]
Dup4x9-neonfma.c56 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up4x9__neonfma() local
61 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
65 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
69 vacc0123p0 = vfmaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
73 vacc0123p0 = vfmaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
77 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
81 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
85 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
89 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
93 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__neonfma()
[all …]
Dup4x9-sse.c56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x9__sse() local
63 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
69 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
75 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
81 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
87 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
93 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
99 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
105 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
111 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse()
[all …]
Dup4x9-psimd.c56 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up4x9__psimd() local
63 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
69 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
75 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
81 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
87 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
93 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
99 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
105 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
111 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd()
[all …]
Dup4x25-psimd-acc2.c88 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2() local
95 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
107 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
119 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
131 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
143 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
155 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi10x0123, vk10x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
167 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi12x0123, vk12x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
179 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi14x0123, vk14x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
191 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_ukernel_up4x25__psimd_acc2()
[all …]
Dup4x25-sse-acc2.c88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2() local
95 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
107 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
119 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
131 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
143 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
155 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi10x0123, vk10x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
167 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi12x0123, vk12x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
179 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi14x0123, vk14x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
191 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_ukernel_up4x25__sse_acc2()
[all …]
Dup8x4-psimd.c46 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up8x4__psimd() local
56 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd()
65 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd()
74 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd()
83 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd()
89 psimd_f32 vacc0123 = psimd_max_f32(vacc0123p0, vmin); in xnn_f32_dwconv_ukernel_up8x4__psimd()
99 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up8x4__psimd() local
105 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd()
111 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd()
117 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up8x4__psimd()
[all …]
Dup8x25-psimd-acc2.c88 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2() local
98 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
116 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
134 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
152 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
170 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
188 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi10x0123, vk10x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
206 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi12x0123, vk12x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
224 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi14x0123, vk14x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
242 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_ukernel_up8x25__psimd_acc2()
[all …]
Dup8x4-sse.c46 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x4__sse() local
56 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse()
65 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse()
74 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse()
83 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse()
89 __m128 vacc0123 = _mm_max_ps(vacc0123p0, vmin); in xnn_f32_dwconv_ukernel_up8x4__sse()
99 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x4__sse() local
105 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse()
111 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse()
117 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x4__sse()
[all …]
Dup8x25-sse-acc2.c88 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() local
98 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
116 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
134 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
152 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
170 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
188 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi10x0123, vk10x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
206 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi12x0123, vk12x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
224 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi14x0123, vk14x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
242 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
[all …]
Dup8x9-neon-acc2.c56 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local
64 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
78 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
92 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
106 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
120 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
124 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
127 float32x4_t vacc0123 = vmaxq_f32(vacc0123p0, vmin); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
136 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neon_acc2() local
141 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neon_acc2()
[all …]
Dup8x9-neonfma-acc2.c56 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local
64 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
78 vacc0123p0 = vfmaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
92 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
106 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
120 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
124 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
127 float32x4_t vacc0123 = vmaxq_f32(vacc0123p0, vmin); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
136 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2() local
141 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2()
[all …]
Dup8x9-sse-acc2.c56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local
66 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
84 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
102 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
120 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
138 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
144 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
147 __m128 vacc0123 = _mm_max_ps(vacc0123p0, vmin); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
157 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2() local
163 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__sse_acc2()
[all …]
Dup8x9-psimd-acc2.c56 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local
66 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
84 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
102 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
120 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
138 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
144 vacc0123p0 = psimd_add_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
147 psimd_f32 vacc0123 = psimd_max_f32(vacc0123p0, vmin); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
157 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2() local
163 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up8x9__psimd_acc2()
[all …]
Dup4x9-psimd-acc2.c56 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() local
63 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
75 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
87 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
99 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
111 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
116 vacc0123p0 = psimd_add_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
118 psimd_f32 vacc0123 = psimd_max_f32(vacc0123p0, vmin); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
125 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2() local
129 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x9__psimd_acc2()
[all …]
Dup4x9-sse-acc2.c56 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() local
63 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
75 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
87 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
99 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
111 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
116 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
118 __m128 vacc0123 = _mm_max_ps(vacc0123p0, vmin); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
125 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2() local
129 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x9__sse_acc2()
[all …]
Dup4x4-psimd.c46 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up4x4__psimd() local
53 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd()
59 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd()
65 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd()
71 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd()
76 psimd_f32 vacc0123 = psimd_max_f32(vacc0123p0, vmin); in xnn_f32_dwconv_ukernel_up4x4__psimd()
83 psimd_f32 vacc0123p0 = psimd_load_f32(w); in xnn_f32_dwconv_ukernel_up4x4__psimd() local
87 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd()
91 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd()
95 vacc0123p0 = psimd_qfma_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_ukernel_up4x4__psimd()
[all …]

12