/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x96.c | 227 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 242 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 256 vf11 = _mm256_mul_ps(vf11, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 270 _mm256_storeu_ps(output + 88, vf11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x192.c | 193 __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 207 vf11 = _mm512_mul_ps(vf11, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 222 _mm512_storeu_ps(output + 176, vf11); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x96-acc2.c | 227 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local 242 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 256 _mm256_storeu_ps(output + 88, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 271 vacc1 = _mm256_add_ps(vacc1, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc3.c | 228 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local 243 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 257 _mm256_storeu_ps(output + 88, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 272 vacc2 = _mm256_add_ps(vacc2, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc6.c | 231 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local 246 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 260 _mm256_storeu_ps(output + 88, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 275 vacc5 = _mm256_add_ps(vacc5, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96.c | 226 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() local 241 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 255 _mm256_storeu_ps(output + 88, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 270 vacc0 = _mm256_add_ps(vacc0, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x192.c | 193 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 207 _mm512_storeu_ps(output + 176, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 222 vacc0 = _mm512_add_ps(vacc0, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc3.c | 195 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 209 _mm512_storeu_ps(output + 176, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 224 vacc2 = _mm512_add_ps(vacc2, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192-acc2.c | 194 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 208 _mm512_storeu_ps(output + 176, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 223 vacc1 = _mm512_add_ps(vacc1, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc6.c | 198 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 212 _mm512_storeu_ps(output + 176, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 227 vacc5 = _mm512_add_ps(vacc5, vf11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 183 __m512 vf11 = _mm512_mul_ps(vp11, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 210 vf11 = _mm512_scalef_ps(vf11, ve11); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 225 _mm512_storeu_ps(y + 176, vf11); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x96.c | 189 __m256 vf11 = _mm256_mul_ps(vp11, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() local 249 vf11 = _mm256_mul_ps(vf11, vs11); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 263 _mm256_storeu_ps(y + 88, vf11); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x96-acc3.c | 227 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local 242 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 256 vacc2 = _mm256_add_ps(vacc2, vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc6.c | 230 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local 245 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 259 vacc5 = _mm256_add_ps(vacc5, vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96.c | 225 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 240 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 254 vacc0 = _mm256_add_ps(vacc0, vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc2.c | 226 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 241 vf11 = _mm256_andnot_ps(_mm256_cmp_ps(vx11, vdenorm_cutoff, _CMP_LT_OS), vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 255 vacc1 = _mm256_add_ps(vacc1, vf11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx512f-p5-scalef-x192-acc6.c | 197 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 211 vacc5 = _mm512_add_ps(vacc5, vf11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx512f-p5-scalef-x192-acc3.c | 194 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 208 vacc2 = _mm512_add_ps(vacc2, vf11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192.c | 192 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 206 vacc0 = _mm512_add_ps(vacc0, vf11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc2.c | 193 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 207 vacc1 = _mm512_add_ps(vacc1, vf11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|