/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x88.c | 74 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 75 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 76 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 77 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 78 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 79 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 80 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 81 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() [all …]
|
D | avx2-p5-x96.c | 76 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 77 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 78 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 79 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 80 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 81 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 82 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 83 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 85 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() [all …]
|
D | avx2-p5-x80.c | 72 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 73 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 74 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 75 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 76 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 77 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 78 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 79 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 80 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 81 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() [all …]
|
D | avx2-p5-x72.c | 70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 105 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() [all …]
|
D | avx2-p5-x64.c | 68 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 69 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 70 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 71 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 72 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 73 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 75 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 100 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 101 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() [all …]
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x96.c | 74 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 75 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 76 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 77 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 78 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 79 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 80 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 81 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() [all …]
|
D | avx2-p5-x96-acc6.c | 79 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 80 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 81 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 82 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 83 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 84 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 85 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 86 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 87 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 88 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() [all …]
|
D | avx2-p5-x96-acc2.c | 75 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 76 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 77 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 78 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 79 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 80 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 81 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 82 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 83 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 84 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() [all …]
|
D | avx2-p5-x96-acc3.c | 76 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 77 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 78 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 79 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 80 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 81 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 82 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 83 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 85 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() [all …]
|
D | avx2-p5-x80-acc2.c | 71 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 72 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 73 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 74 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 75 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 76 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 77 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 78 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 79 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 80 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() [all …]
|
D | avx2-p5-x80.c | 70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 79 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() [all …]
|
D | avx2-p5-x72-acc3.c | 70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 105 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() [all …]
|
D | avx2-p5-x72.c | 68 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 69 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 70 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 71 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 72 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 73 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 75 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 76 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 103 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() [all …]
|
D | avx2-p5-x80-acc5.c | 74 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 75 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 76 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 77 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 78 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 79 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 80 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 81 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() [all …]
|
D | avx2-p5-x64-acc2.c | 67 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 68 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 69 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 70 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 71 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 72 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 73 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 74 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 99 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 100 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() [all …]
|
D | avx2-p5-x64.c | 66 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 67 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 68 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 69 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 70 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 71 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 72 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 73 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 98 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 99 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() [all …]
|
D | avx2-p5-x64-acc4.c | 69 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 70 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 71 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 72 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 73 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 74 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 75 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 76 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 101 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 102 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() [all …]
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx2-p5-x96.c | 78 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 79 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 80 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 81 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 82 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 83 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 84 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 85 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 86 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 87 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() [all …]
|
D | avx2-p5-x80-acc2.c | 76 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 77 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 78 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 79 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 80 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 81 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 82 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 83 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 84 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 85 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() [all …]
|
D | avx2-p5-x80.c | 74 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 75 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 76 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 77 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 78 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 79 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 80 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 81 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 82 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() 83 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx2_p5_x80() [all …]
|
D | avx2-p5-x96-acc3.c | 82 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 83 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 84 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 85 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 86 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 87 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 88 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 89 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 90 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 91 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() [all …]
|
D | avx2-p5-x96-acc2.c | 80 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 81 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 82 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 83 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 84 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 85 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 86 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 87 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 88 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 89 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() [all …]
|
D | avx2-p5-x72.c | 72 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 73 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 74 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 75 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 76 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 77 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 78 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 79 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 80 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() 82 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_lo, vt0); in xnn_f32_raddextexp_ukernel__avx2_p5_x72() [all …]
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx2-p5-x96.c | 80 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 81 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 82 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 83 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 84 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 85 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 86 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 87 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 88 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 89 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() [all …]
|
D | avx2-p5-x88.c | 78 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2_hi, vx0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 79 __m256 vt1 = _mm256_fmadd_ps(vn1, vminus_ln2_hi, vx1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 80 __m256 vt2 = _mm256_fmadd_ps(vn2, vminus_ln2_hi, vx2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 81 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2_hi, vx3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 82 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2_hi, vx4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 83 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 84 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 85 __m256 vt7 = _mm256_fmadd_ps(vn7, vminus_ln2_hi, vx7); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 86 __m256 vt8 = _mm256_fmadd_ps(vn8, vminus_ln2_hi, vx8); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 87 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() [all …]
|