Home
last modified time | relevance | path

Searched refs:vp4 (Results 1 – 25 of 129) sorted by relevance

123456

/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx512f-p5-scalef-x80.c83 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() local
89 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
116 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
Davx512f-p5-scalef-x96.c88 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() local
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
102 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
126 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
Davx512f-p5-scalef-x112.c93 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() local
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
117 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
136 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
Davx512f-p5-scalef-x128.c98 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() local
107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
134 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
146 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
Davx512f-p5-scalef-x144.c103 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local
113 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
123 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
133 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
143 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
156 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
/external/XNNPACK/src/f32-vscaleextexp/gen/
Davx512f-p5-scalef-x80.c77 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() local
83 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
89 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
113 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
Davx512f-p5-scalef-x96.c81 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() local
88 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
102 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
122 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
Davx2-p5-x40.c83 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() local
89 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
95 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
101 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
107 vp4 = _mm256_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
119 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
Davx512f-p5-scalef-x112.c85 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local
93 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
117 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
131 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
Davx512f-p5-scalef-x128.c89 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local
98 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
140 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
Davx2-p5-x48.c87 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() local
94 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48()
101 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48()
108 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48()
115 vp4 = _mm256_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48()
128 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48()
Davx512f-p5-scalef-x144.c93 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local
103 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
113 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
123 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
133 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
149 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
Davx2-p5-x56.c91 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() local
99 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
107 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
115 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
123 vp4 = _mm256_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
137 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx512f-p5-scalef-x128-acc2.c98 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local
107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
134 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
146 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
Davx512f-p5-scalef-x128.c97 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() local
106 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
115 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
124 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
133 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
145 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
Davx512f-p5-scalef-x128-acc4.c100 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
118 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
127 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
136 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
148 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
Davx512f-p5-scalef-x144-acc3.c104 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local
114 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
124 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
134 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
144 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
157 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
Davx512f-p5-scalef-x144.c102 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local
112 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
122 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
132 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
142 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
155 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx512f-p5-scalef-x128-acc2.c99 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local
108 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
117 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
126 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
135 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
147 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
Davx512f-p5-scalef-x128.c98 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() local
107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128()
116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128()
125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128()
134 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128()
146 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128()
Davx512f-p5-scalef-x144-acc3.c105 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local
115 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
135 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
145 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
158 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
Davx512f-p5-scalef-x128-acc4.c101 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local
110 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
119 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
128 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
137 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
149 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
Davx512f-p5-scalef-x144.c103 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local
113 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
123 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
133 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
143 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
156 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
/external/XNNPACK/src/f32-raddextexp/gen/
Davx512f-p5-scalef-x128.c89 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local
98 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
165 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
Davx512f-p5-scalef-x128-acc2.c91 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local
100 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
118 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
127 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
169 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()

123456