/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 100 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 101 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 102 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 103 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 104 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 105 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 106 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 107 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 108 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192.c | 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 99 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 100 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 101 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 102 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 103 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 104 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 105 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 106 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 107 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 101 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 102 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 103 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 104 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 105 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 106 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 107 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 108 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 109 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192-acc6.c | 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 104 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 105 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 106 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 107 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 108 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 109 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 110 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 111 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 112 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() local 95 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 96 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 97 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 98 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 99 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 100 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 101 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 102 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 103 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|
D | avx512f-p5-scalef-x160.c | 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() local 91 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 92 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 93 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 94 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 95 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 96 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 97 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 98 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 99 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 92 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 93 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 94 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 95 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 96 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 97 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 98 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 99 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 100 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x144.c | 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 87 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 88 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 89 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 90 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 91 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 92 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 93 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 94 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 95 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 89 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 90 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 91 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 92 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 93 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 94 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 95 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 96 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 97 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 100 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 101 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 102 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 103 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 104 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 105 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 106 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 107 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 108 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() local 96 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 97 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 98 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 99 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 100 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 101 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 102 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 103 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 104 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local 92 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 93 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 94 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 95 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 96 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 97 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 98 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 99 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 100 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local 88 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 89 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 90 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 91 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 92 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 93 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 94 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 95 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 96 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 30 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 87 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 88 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 89 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 90 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 91 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 92 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 93 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 94 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 95 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 30 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 84 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 85 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 86 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 87 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 88 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 89 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 90 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 91 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 92 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 30 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local 81 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 82 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 83 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 84 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 85 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 86 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 87 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 88 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 89 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 30 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 78 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 79 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 80 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 81 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 82 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 83 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 84 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 85 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 86 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 101 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 102 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 103 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 104 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 105 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 106 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 107 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 108 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 109 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 102 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 103 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 104 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 105 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 106 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 107 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 108 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 109 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 110 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 100 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 101 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 102 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 103 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 104 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 105 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 106 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 107 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 108 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 93 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 94 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 95 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 96 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 97 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 98 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 99 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 100 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 101 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x160.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() local 92 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 93 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 94 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 95 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 96 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 97 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 98 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 99 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 100 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local 88 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 89 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 90 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 91 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 92 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 93 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 94 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 95 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 96 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 90 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 91 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 92 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 93 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 94 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 95 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 96 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 97 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 98 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 87 vt0 = _mm512_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 88 vt1 = _mm512_fmadd_ps(vn1, vminus_ln2_lo, vt1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 89 vt2 = _mm512_fmadd_ps(vn2, vminus_ln2_lo, vt2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 90 vt3 = _mm512_fmadd_ps(vn3, vminus_ln2_lo, vt3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 91 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_lo, vt4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 92 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 93 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 94 vt7 = _mm512_fmadd_ps(vn7, vminus_ln2_lo, vt7); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 95 vt8 = _mm512_fmadd_ps(vn8, vminus_ln2_lo, vt8); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|