Home
last modified time | relevance | path

Searched refs:_mm512_scalef_ps (Results 1 – 25 of 146) sorted by relevance

123456

/external/XNNPACK/src/f32-raddextexp/gen/
Davx512f-p5-scalef-x192-acc6.c219 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
220 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
221 vaccv2 = _mm512_scalef_ps(vaccv2, vdelta_acce2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
222 vaccv3 = _mm512_scalef_ps(vaccv3, vdelta_acce3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
223 vaccv4 = _mm512_scalef_ps(vaccv4, vdelta_acce4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
224 vaccv5 = _mm512_scalef_ps(vaccv5, vdelta_acce5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
225 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
226 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
227 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
228 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
[all …]
Davx512f-p5-scalef-x160-acc5.c194 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
195 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
196 vaccv2 = _mm512_scalef_ps(vaccv2, vdelta_acce2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
197 vaccv3 = _mm512_scalef_ps(vaccv3, vdelta_acce3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
198 vaccv4 = _mm512_scalef_ps(vaccv4, vdelta_acce4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
199 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
200 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
201 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
202 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
203 vaccv4 = _mm512_add_ps(vaccv4, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
[all …]
Davx512f-p5-scalef-x128-acc4.c169 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
170 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
171 vaccv2 = _mm512_scalef_ps(vaccv2, vdelta_acce2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
172 vaccv3 = _mm512_scalef_ps(vaccv3, vdelta_acce3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
173 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
174 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
175 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
176 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
177 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
178 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
[all …]
Davx512f-p5-scalef-x144-acc3.c177 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
178 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
179 vaccv2 = _mm512_scalef_ps(vaccv2, vdelta_acce2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
180 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
181 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
182 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
183 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
184 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
185 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
186 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
[all …]
Davx512f-p5-scalef-x192-acc3.c210 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
211 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
212 vaccv2 = _mm512_scalef_ps(vaccv2, vdelta_acce2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
213 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
214 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
215 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
216 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
217 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
218 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
219 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
[all …]
Davx512f-p5-scalef-x128-acc2.c163 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
164 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
165 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
166 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
167 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
168 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
169 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
170 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
171 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
172 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
[all …]
Davx512f-p5-scalef-x160-acc2.c185 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
186 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
187 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
188 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
189 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
190 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
191 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
192 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
193 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
194 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
[all …]
Davx512f-p5-scalef-x192-acc2.c207 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
208 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
209 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
210 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
211 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
212 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
213 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
214 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
215 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
216 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
[all …]
Davx512f-p5-scalef-x192.c204 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
205 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
206 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
207 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
208 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
209 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
210 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
211 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
212 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
213 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
[all …]
Davx512f-p5-scalef-x160.c182 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
183 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
184 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
185 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
186 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
187 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
188 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
189 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
190 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
191 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
[all …]
Davx512f-p5-scalef-x144.c171 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
172 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
173 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
174 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
175 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
176 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
177 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
178 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
179 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
180 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
[all …]
Davx512f-p5-scalef-x128.c160 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
161 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
162 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
163 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
164 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
165 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
166 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
167 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
168 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
201 vaccv = _mm512_scalef_ps(vaccv, vdelta_acce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
[all …]
/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx512f-p5-scalef-x192-acc2.c182 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
183 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
184 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
185 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
186 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
187 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
188 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
189 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
190 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
191 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
[all …]
Davx512f-p5-scalef-x192.c181 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
182 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
183 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
184 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
185 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
186 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
187 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
188 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
189 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
190 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
[all …]
Davx512f-p5-scalef-x192-acc3.c183 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
184 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
185 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
186 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
187 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
188 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
189 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
190 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
191 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
192 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
[all …]
Davx512f-p5-scalef-x192-acc6.c186 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
187 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
188 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
189 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
190 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
191 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
192 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
193 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
194 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
195 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
[all …]
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx512f-p5-scalef-x192.c182 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
183 __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
184 __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
185 __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
186 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
187 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
188 __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
189 __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
190 __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
191 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
[all …]
Davx512f-p5-scalef-x176.c172 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
173 __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
174 __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
175 __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
176 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
177 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
178 __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
179 __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
180 __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
181 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
[all …]
Davx512f-p5-scalef-x160.c162 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
163 __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
164 __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
165 __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
166 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
167 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
168 __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
169 __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
170 __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
171 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
[all …]
/external/XNNPACK/src/f32-vscaleextexp/gen/
Davx512f-p5-scalef-x192.c199 vf0 = _mm512_scalef_ps(vf0, ve0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
200 vf1 = _mm512_scalef_ps(vf1, ve1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
201 vf2 = _mm512_scalef_ps(vf2, ve2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
202 vf3 = _mm512_scalef_ps(vf3, ve3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
203 vf4 = _mm512_scalef_ps(vf4, ve4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
204 vf5 = _mm512_scalef_ps(vf5, ve5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
205 vf6 = _mm512_scalef_ps(vf6, ve6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
206 vf7 = _mm512_scalef_ps(vf7, ve7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
207 vf8 = _mm512_scalef_ps(vf8, ve8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
208 vf9 = _mm512_scalef_ps(vf9, ve9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
[all …]
Davx512f-p5-scalef-x176.c188 vf0 = _mm512_scalef_ps(vf0, ve0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
189 vf1 = _mm512_scalef_ps(vf1, ve1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
190 vf2 = _mm512_scalef_ps(vf2, ve2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
191 vf3 = _mm512_scalef_ps(vf3, ve3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
192 vf4 = _mm512_scalef_ps(vf4, ve4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
193 vf5 = _mm512_scalef_ps(vf5, ve5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
194 vf6 = _mm512_scalef_ps(vf6, ve6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
195 vf7 = _mm512_scalef_ps(vf7, ve7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
196 vf8 = _mm512_scalef_ps(vf8, ve8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
197 vf9 = _mm512_scalef_ps(vf9, ve9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
[all …]
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx512f-p5-scalef-x192-acc2.c183 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
184 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
185 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
186 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
187 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
188 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
189 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
190 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
191 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
192 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
[all …]
Davx512f-p5-scalef-x192-acc3.c184 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
185 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
186 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
187 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
188 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
189 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
190 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
191 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
192 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
193 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
[all …]
Davx512f-p5-scalef-x192.c182 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
183 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
184 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
185 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
186 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
187 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
188 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
189 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
190 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
191 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
[all …]
Davx512f-p5-scalef-x160-acc2.c163 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
164 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
165 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
166 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
167 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
168 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
169 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
170 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
171 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
172 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
[all …]

123456