/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x88.c | 218 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 219 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 220 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 221 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 222 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 223 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 224 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 225 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 226 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 227 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() [all …]
|
D | avx2-p5-x96.c | 231 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 232 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 233 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 234 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 235 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 236 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 237 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 238 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 239 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 240 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() [all …]
|
D | avx2-p5-x80.c | 205 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 206 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 207 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 208 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 209 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 210 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 211 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 212 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 213 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 214 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() [all …]
|
D | avx2-p5-x72.c | 192 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 193 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 194 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 195 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 196 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 197 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 198 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 199 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 200 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 263 vf = _mm256_andnot_ps(_mm256_cmp_ps(vx, vdenorm_cutoff, _CMP_LT_OS), vf); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() [all …]
|
D | avx2-p5-x64.c | 179 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 180 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 181 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 182 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 183 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 184 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 185 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 186 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 247 vf = _mm256_andnot_ps(_mm256_cmp_ps(vx, vdenorm_cutoff, _CMP_LT_OS), vf); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 297 vf = _mm256_andnot_ps(_mm256_cmp_ps(vx, vdenorm_cutoff, _CMP_LT_OS), vf); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x96-acc3.c | 231 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 232 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 233 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 234 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 235 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 236 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 237 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 238 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 239 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 240 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() [all …]
|
D | avx2-p5-x96-acc6.c | 234 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 235 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 236 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 237 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 238 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 239 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 240 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 241 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 242 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 243 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() [all …]
|
D | avx2-p5-x96.c | 229 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 230 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 231 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 232 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 233 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 234 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 235 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 236 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 237 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 238 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() [all …]
|
D | avx2-p5-x96-acc2.c | 230 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 231 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 232 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 233 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 234 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 235 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 236 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 237 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 238 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 239 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() [all …]
|
D | avx2-p5-x80.c | 203 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 204 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 205 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 206 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 207 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 208 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 209 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 210 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 211 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 212 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() [all …]
|
D | avx2-p5-x80-acc2.c | 204 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 205 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 206 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 207 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 208 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 209 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 210 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 211 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 212 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 213 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() [all …]
|
D | avx2-p5-x72.c | 190 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 191 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 192 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 193 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 194 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 195 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 196 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 197 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 198 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 251 vf = _mm256_andnot_ps(_mm256_cmp_ps(vx, vdenorm_cutoff, _CMP_LT_OS), vf); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() [all …]
|
D | avx2-p5-x72-acc3.c | 192 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 193 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 194 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 195 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 196 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 197 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 198 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 199 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 200 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 256 vf = _mm256_andnot_ps(_mm256_cmp_ps(vx, vdenorm_cutoff, _CMP_LT_OS), vf); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() [all …]
|
D | avx2-p5-x80-acc5.c | 207 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 208 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 209 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 210 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 211 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 212 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 213 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 214 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 215 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 216 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() [all …]
|
D | avx2-p5-x64.c | 177 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 178 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 179 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 180 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 181 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 182 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 183 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 184 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 236 vf = _mm256_andnot_ps(_mm256_cmp_ps(vx, vdenorm_cutoff, _CMP_LT_OS), vf); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 282 vf = _mm256_andnot_ps(_mm256_cmp_ps(vx, vdenorm_cutoff, _CMP_LT_OS), vf); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x64-acc2.c | 178 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 179 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 180 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 181 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 182 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 183 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 184 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 185 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 239 vf = _mm256_andnot_ps(_mm256_cmp_ps(vx, vdenorm_cutoff, _CMP_LT_OS), vf); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 285 vf = _mm256_andnot_ps(_mm256_cmp_ps(vx, vdenorm_cutoff, _CMP_LT_OS), vf); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x96-acc2.c | 231 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 232 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 233 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 234 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 235 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 236 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 237 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 238 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 239 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 240 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() [all …]
|
D | avx2-p5-x96-acc3.c | 232 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 233 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 234 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 235 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 236 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 237 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 238 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 239 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 240 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 241 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() [all …]
|
D | avx2-p5-x96-acc6.c | 235 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 236 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 237 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 238 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 239 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 240 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 241 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 242 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 243 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 244 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() [all …]
|
D | avx2-p5-x96.c | 230 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 231 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 232 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 233 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 234 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 235 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 236 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 237 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 238 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 239 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() [all …]
|
D | avx2-p5-x80-acc5.c | 208 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 209 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 210 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 211 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 212 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 213 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 214 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 215 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 216 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 217 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() [all …]
|
D | avx2-p5-x80.c | 204 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 205 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 206 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 207 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 208 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 209 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 210 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 211 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 212 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 213 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() [all …]
|
D | avx2-p5-x80-acc2.c | 205 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 206 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 207 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 208 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 209 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 210 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 211 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 212 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 213 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vx8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 214 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() [all …]
|
D | avx2-p5-x64-acc2.c | 179 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 180 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vx1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 181 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vx2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 182 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vx3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 183 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vx4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 184 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vx5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 185 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 186 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 251 vf = _mm256_andnot_ps(_mm256_cmp_ps(vx, vdenorm_cutoff, _CMP_LT_OS), vf); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 301 vf = _mm256_andnot_ps(_mm256_cmp_ps(vx, vdenorm_cutoff, _CMP_LT_OS), vf); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-div-x80.c | 225 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 226 vf1 = _mm256_andnot_ps(_mm256_cmp_ps(vz1, vdenorm_cutoff, _CMP_LT_OS), vf1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 227 vf2 = _mm256_andnot_ps(_mm256_cmp_ps(vz2, vdenorm_cutoff, _CMP_LT_OS), vf2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 228 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vz3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 229 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 230 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 231 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 232 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 233 vf8 = _mm256_andnot_ps(_mm256_cmp_ps(vz8, vdenorm_cutoff, _CMP_LT_OS), vf8); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 234 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vz9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() [all …]
|