/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192-acc6.c | 219 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 220 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 221 vaccv2 = _mm512_scalef_ps(vaccv2, vdelta_acce2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 222 vaccv3 = _mm512_scalef_ps(vaccv3, vdelta_acce3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 223 vaccv4 = _mm512_scalef_ps(vaccv4, vdelta_acce4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 224 vaccv5 = _mm512_scalef_ps(vaccv5, vdelta_acce5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 225 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 226 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 227 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 228 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 194 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 195 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 196 vaccv2 = _mm512_scalef_ps(vaccv2, vdelta_acce2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 197 vaccv3 = _mm512_scalef_ps(vaccv3, vdelta_acce3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 198 vaccv4 = _mm512_scalef_ps(vaccv4, vdelta_acce4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 199 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 200 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 201 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 202 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 203 vaccv4 = _mm512_add_ps(vaccv4, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|
D | avx512f-p5-scalef-x128-acc4.c | 169 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 170 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 171 vaccv2 = _mm512_scalef_ps(vaccv2, vdelta_acce2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 172 vaccv3 = _mm512_scalef_ps(vaccv3, vdelta_acce3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 173 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 174 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 175 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 176 vaccv3 = _mm512_add_ps(vaccv3, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 177 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 178 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 177 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 178 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 179 vaccv2 = _mm512_scalef_ps(vaccv2, vdelta_acce2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 180 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 181 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 182 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 183 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 184 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 185 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 186 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 210 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 211 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 212 vaccv2 = _mm512_scalef_ps(vaccv2, vdelta_acce2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 213 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 214 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 215 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 216 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 217 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 218 vaccv2 = _mm512_add_ps(vaccv2, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 219 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x128-acc2.c | 163 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 164 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 165 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 166 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 167 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 168 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 169 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 170 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 171 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 172 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 185 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 186 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 187 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 188 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 189 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 190 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 191 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 192 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 193 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 194 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 207 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 208 vaccv1 = _mm512_scalef_ps(vaccv1, vdelta_acce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 209 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 210 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 211 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 212 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 213 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 214 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 215 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 216 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192.c | 204 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 205 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 206 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 207 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 208 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 209 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 210 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 211 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 212 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 213 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x160.c | 182 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 183 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 184 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 185 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 186 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 187 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 188 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 189 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 190 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 191 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 171 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 172 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 173 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 174 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 175 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 176 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 177 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 178 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 179 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 180 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp8, vdelta_e8)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x128.c | 160 vaccv0 = _mm512_scalef_ps(vaccv0, vdelta_acce0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 161 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp0, vdelta_e0)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 162 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp1, vdelta_e1)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 163 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp2, vdelta_e2)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 164 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp3, vdelta_e3)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 165 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 166 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp5, vdelta_e5)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 167 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 168 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 201 vaccv = _mm512_scalef_ps(vaccv, vdelta_acce); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() [all …]
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 182 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 183 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 184 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 185 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 186 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 187 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 188 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 189 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 190 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 191 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192.c | 181 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 182 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 183 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 184 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 185 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 186 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 187 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 188 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 189 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 190 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 183 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 184 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 185 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 186 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 187 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 188 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 189 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 190 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 191 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 192 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192-acc6.c | 186 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 187 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 188 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 189 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 190 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 191 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 192 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 193 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 194 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 195 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 182 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 183 __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 184 __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 185 __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 186 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 187 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 188 __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 189 __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 190 __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 191 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 172 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 173 __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 174 __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 175 __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 176 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 177 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 178 __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 179 __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 180 __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 181 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 162 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 163 __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 164 __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 165 __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 166 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 167 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 168 __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 169 __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 170 __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 171 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 199 vf0 = _mm512_scalef_ps(vf0, ve0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 200 vf1 = _mm512_scalef_ps(vf1, ve1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 201 vf2 = _mm512_scalef_ps(vf2, ve2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 202 vf3 = _mm512_scalef_ps(vf3, ve3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 203 vf4 = _mm512_scalef_ps(vf4, ve4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 204 vf5 = _mm512_scalef_ps(vf5, ve5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 205 vf6 = _mm512_scalef_ps(vf6, ve6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 206 vf7 = _mm512_scalef_ps(vf7, ve7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 207 vf8 = _mm512_scalef_ps(vf8, ve8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 208 vf9 = _mm512_scalef_ps(vf9, ve9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 188 vf0 = _mm512_scalef_ps(vf0, ve0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 189 vf1 = _mm512_scalef_ps(vf1, ve1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 190 vf2 = _mm512_scalef_ps(vf2, ve2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 191 vf3 = _mm512_scalef_ps(vf3, ve3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 192 vf4 = _mm512_scalef_ps(vf4, ve4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 193 vf5 = _mm512_scalef_ps(vf5, ve5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 194 vf6 = _mm512_scalef_ps(vf6, ve6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 195 vf7 = _mm512_scalef_ps(vf7, ve7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 196 vf8 = _mm512_scalef_ps(vf8, ve8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 197 vf9 = _mm512_scalef_ps(vf9, ve9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() [all …]
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 183 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 184 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 185 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 186 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 187 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 188 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 189 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 190 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 191 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 192 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 184 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 185 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 186 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 187 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 188 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 189 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 190 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 191 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 192 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 193 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192.c | 182 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 183 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 184 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 185 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 186 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 187 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 188 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 189 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 190 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 191 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 163 const __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 164 const __m512 vf1 = _mm512_scalef_ps(vp1, vn1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 165 const __m512 vf2 = _mm512_scalef_ps(vp2, vn2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 166 const __m512 vf3 = _mm512_scalef_ps(vp3, vn3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 167 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 168 const __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 169 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 170 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 171 const __m512 vf8 = _mm512_scalef_ps(vp8, vn8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 172 const __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|