Home
last modified time | relevance | path

Searched refs:_mm256_fmadd_ps (Results 1 – 25 of 356) sorted by relevance

12345678910>>...15

/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x88.c74 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
75 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
76 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
77 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
78 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
79 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
80 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
81 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
[all …]
Davx2-p5-x96.c76 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
77 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
78 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
79 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
80 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
81 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
82 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
83 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
85 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
[all …]
Davx2-p5-x80.c72 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
73 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
74 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
75 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
76 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
77 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
78 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
79 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
80 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
81 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
[all …]
Davx2-p5-x72.c70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
105 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
[all …]
Davx2-p5-x64.c68 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
69 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
70 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
71 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
72 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
73 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
75 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
100 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
101 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
[all …]
/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx2-p5-x96.c74 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
75 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
76 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
77 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
78 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
79 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
80 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
81 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
[all …]
Davx2-p5-x96-acc6.c79 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
80 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
81 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
82 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
83 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
84 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
85 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
86 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
87 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
88 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
[all …]
Davx2-p5-x96-acc2.c75 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
76 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
77 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
78 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
79 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
80 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
81 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
82 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
83 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
84 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
[all …]
Davx2-p5-x96-acc3.c76 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
77 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
78 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
79 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
80 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
81 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
82 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
83 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
85 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
[all …]
Davx2-p5-x80-acc2.c71 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
72 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
73 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
74 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
75 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
76 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
77 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
78 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
79 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
80 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
[all …]
Davx2-p5-x80.c70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
79 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
[all …]
Davx2-p5-x72-acc3.c70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
105 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
[all …]
Davx2-p5-x72.c68 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
69 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
70 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
71 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
72 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
73 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
75 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
76 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
103 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
[all …]
Davx2-p5-x80-acc5.c74 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
75 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
76 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
77 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
78 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
79 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
80 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
81 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
[all …]
Davx2-p5-x64-acc2.c67 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
68 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
69 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
70 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
71 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
72 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
73 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
74 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
99 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
100 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
[all …]
Davx2-p5-x64.c66 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
67 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
68 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
69 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
70 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
71 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
72 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
73 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
98 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
99 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
[all …]
Davx2-p5-x64-acc4.c69 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
70 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
71 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
72 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
73 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
74 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
75 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
76 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
101 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
102 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
[all …]
/external/XNNPACK/src/f32-raddextexp/gen/
Davx2-p5-x96.c78 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
79 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
80 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
81 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
82 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
83 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
84 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
85 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
86 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
87 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
[all …]
Davx2-p5-x80-acc2.c76 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
77 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
78 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
79 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
80 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
81 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
82 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
83 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
84 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
85 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
[all …]
Davx2-p5-x80.c74 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
75 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
76 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
77 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
78 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
79 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
80 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
81 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
82 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
83 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx2_p5_x80()
[all …]
Davx2-p5-x96-acc3.c82 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
83 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
84 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
85 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
86 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
87 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
88 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
89 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
90 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
91 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
[all …]
Davx2-p5-x96-acc2.c80 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
81 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
82 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
83 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
84 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
85 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
86 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
87 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
88 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
89 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
[all …]
Davx2-p5-x72.c72 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
73 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
74 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
75 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
76 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
77 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
78 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
79 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
80 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
82 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddextexp_ukernel__avx2_p5_x72()
[all …]
/external/XNNPACK/src/f32-vscaleextexp/gen/
Davx2-p5-x96.c80 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
81 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
82 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
83 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
84 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
85 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
86 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
87 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
88 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
89 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
[all …]
Davx2-p5-x88.c78 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
79 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
80 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
81 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
82 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
83 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
84 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
85 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
86 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
87 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
[all …]

12345678910>>...15