Home
last modified time | relevance | path

Searched refs:vminus_ln2 (Results 1 – 25 of 278) sorted by relevance

12345678910>>...12

/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx512f-rr1-p5-scalef-x192-acc2.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() local
81 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2()
82 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2()
83 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2()
84 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2()
85 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2()
86 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2()
87 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2()
88 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2()
89 const __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2()
[all …]
Davx512f-rr1-p5-scalef-x192.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() local
80 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192()
81 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192()
82 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192()
83 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192()
84 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192()
85 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192()
86 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192()
87 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192()
88 const __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192()
[all …]
Davx512f-rr1-p5-scalef-x192-acc3.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() local
82 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3()
83 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3()
84 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3()
85 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3()
86 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3()
87 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3()
88 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3()
89 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3()
90 const __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3()
[all …]
Davx512f-rr1-p5-scalef-x192-acc6.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() local
85 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6()
86 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6()
87 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6()
88 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6()
89 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6()
90 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6()
91 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6()
92 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6()
93 const __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6()
[all …]
Davx512f-rr1-p5-scalef-x160-acc2.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc2() local
75 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc2()
76 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc2()
77 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc2()
78 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc2()
79 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc2()
80 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc2()
81 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc2()
82 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc2()
83 const __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc2()
[all …]
Davx512f-rr1-p5-scalef-x160.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160() local
74 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160()
75 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160()
76 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160()
77 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160()
78 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160()
79 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160()
80 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160()
81 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160()
82 const __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160()
[all …]
Davx512f-rr1-p5-scalef-x160-acc5.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc5() local
78 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc5()
79 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc5()
80 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc5()
81 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc5()
82 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc5()
83 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc5()
84 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc5()
85 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc5()
86 const __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x160_acc5()
[all …]
Davx512f-rr1-p5-scalef-x144.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144() local
71 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144()
72 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144()
73 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144()
74 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144()
75 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144()
76 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144()
77 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144()
78 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144()
79 const __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144()
[all …]
Davx512f-rr1-p5-scalef-x144-acc3.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144_acc3() local
73 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144_acc3()
74 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144_acc3()
75 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144_acc3()
76 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144_acc3()
77 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144_acc3()
78 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144_acc3()
79 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144_acc3()
80 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144_acc3()
81 const __m512 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x144_acc3()
[all …]
Davx512f-rr1-p5-scalef-x128-acc4.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc4() local
71 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc4()
72 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc4()
73 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc4()
74 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc4()
75 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc4()
76 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc4()
77 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc4()
78 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc4()
166 const __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2, vx); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc4()
[all …]
Davx512f-rr1-p5-scalef-x128-acc2.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc2() local
69 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc2()
70 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc2()
71 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc2()
72 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc2()
73 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc2()
74 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc2()
75 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc2()
76 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc2()
162 const __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2, vx); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128_acc2()
[all …]
Davx512f-rr1-p5-scalef-x128.c30 const __m512 vminus_ln2 = _mm512_set1_ps(params->avx512_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128() local
68 const __m512 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128()
69 const __m512 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128()
70 const __m512 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128()
71 const __m512 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128()
72 const __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128()
73 const __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128()
74 const __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128()
75 const __m512 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128()
160 const __m512 vt = _mm512_fmadd_ps(vn, vminus_ln2, vx); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x128()
[all …]
Davx2-rr1-p5-x96-acc2.c30 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p5.minus_ln2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() local
107 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
108 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
109 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
110 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
111 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
112 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
113 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
114 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
115 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
[all …]
/external/XNNPACK/src/f16-raddstoreexpminusmax/gen/
Davx2-rr1-p2-x96-acc6.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc6() local
111 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc6()
112 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc6()
113 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc6()
114 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc6()
115 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc6()
116 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc6()
117 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc6()
118 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc6()
119 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc6()
[all …]
Davx2-rr1-p2-x96-acc3.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc3() local
108 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc3()
109 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc3()
110 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc3()
111 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc3()
112 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc3()
113 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc3()
114 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc3()
115 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc3()
116 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc3()
[all …]
Davx2-rr1-p2-x96-acc2.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc2() local
107 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc2()
108 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc2()
109 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc2()
110 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc2()
111 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc2()
112 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc2()
113 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc2()
114 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc2()
115 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96_acc2()
[all …]
Davx2-rr1-p2-x96.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96() local
106 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96()
107 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96()
108 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96()
109 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96()
110 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96()
111 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96()
112 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96()
113 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96()
114 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x96()
[all …]
Davx2-rr1-p2-x80.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80() local
96 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80()
97 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80()
98 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80()
99 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80()
100 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80()
101 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80()
102 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80()
103 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80()
104 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80()
[all …]
Davx2-rr1-p2-x80-acc2.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc2() local
97 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc2()
98 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc2()
99 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc2()
100 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc2()
101 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc2()
102 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc2()
103 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc2()
104 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc2()
105 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc2()
[all …]
Davx2-rr1-p2-x80-acc5.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc5() local
100 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc5()
101 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc5()
102 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc5()
103 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc5()
104 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc5()
105 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc5()
106 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc5()
107 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc5()
108 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x80_acc5()
[all …]
Davx2-rr1-p2-x72.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72() local
91 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72()
92 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72()
93 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72()
94 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72()
95 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72()
96 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72()
97 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72()
98 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72()
99 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72()
[all …]
Davx2-rr1-p2-x72-acc3.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72_acc3() local
93 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72_acc3()
94 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72_acc3()
95 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72_acc3()
96 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72_acc3()
97 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72_acc3()
98 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72_acc3()
99 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72_acc3()
100 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72_acc3()
101 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2, vx8); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x72_acc3()
[all …]
Davx2-rr1-p2-x64-acc2.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc2() local
87 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc2()
88 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc2()
89 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc2()
90 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc2()
91 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc2()
92 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc2()
93 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc2()
94 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc2()
166 __m256 vt = _mm256_fmadd_ps(vn, vminus_ln2, vx); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc2()
[all …]
Davx2-rr1-p2-x64-acc4.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc4() local
89 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc4()
90 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc4()
91 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc4()
92 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc4()
93 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc4()
94 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc4()
95 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc4()
96 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc4()
170 __m256 vt = _mm256_fmadd_ps(vn, vminus_ln2, vx); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64_acc4()
[all …]
Davx2-rr1-p2-x64.c31 const __m256 vminus_ln2 = _mm256_load_ps(params->avx2_rr1_p2.minus_ln2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64() local
86 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vx0); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64()
87 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2, vx1); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64()
88 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2, vx2); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64()
89 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vx3); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64()
90 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vx4); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64()
91 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vx5); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64()
92 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vx6); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64()
93 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2, vx7); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64()
164 __m256 vt = _mm256_fmadd_ps(vn, vminus_ln2, vx); in xnn_f16_raddstoreexpminusmax_ukernel__avx2_rr1_p2_x64()
[all …]

12345678910>>...12