/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x96-acc3.c | 87 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local 102 const __m256 vs11 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn11), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 116 vn11 = _mm256_sub_ps(vn11, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 131 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 144 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc6.c | 90 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local 105 const __m256 vs11 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn11), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 119 vn11 = _mm256_sub_ps(vn11, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 134 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 147 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96.c | 85 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 100 const __m256 vs11 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn11), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 114 vn11 = _mm256_sub_ps(vn11, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 129 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 142 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc2.c | 86 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 101 const __m256 vs11 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn11), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 115 vn11 = _mm256_sub_ps(vn11, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 130 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 143 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx512f-p5-scalef-x192-acc6.c | 87 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 102 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 115 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 197 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx512f-p5-scalef-x192-acc3.c | 84 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 99 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 112 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 194 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192.c | 82 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 97 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 110 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 192 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc2.c | 83 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 98 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 111 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 193 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x96-acc2.c | 87 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local 102 const __m256 vs11 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn11), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 116 vn11 = _mm256_sub_ps(vn11, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 131 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 144 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc3.c | 88 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local 103 const __m256 vs11 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn11), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 117 vn11 = _mm256_sub_ps(vn11, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 132 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 145 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc6.c | 91 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local 106 const __m256 vs11 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn11), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 120 vn11 = _mm256_sub_ps(vn11, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 135 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 148 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96.c | 86 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() local 101 const __m256 vs11 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn11), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 115 vn11 = _mm256_sub_ps(vn11, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 130 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 143 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x192.c | 83 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 98 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 111 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 193 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc3.c | 85 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 100 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 113 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 195 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192-acc2.c | 84 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 99 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 112 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 194 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc6.c | 88 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 103 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 116 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 198 const __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x96.c | 87 __m256 vn11 = _mm256_fmadd_ps(vx11, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 102 const __m256 vs11 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn11), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 116 vn11 = _mm256_sub_ps(vn11, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 131 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 144 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x192.c | 83 __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 98 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 111 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 193 __m512 vf11 = _mm512_scalef_ps(vp11, vn11); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 70 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 85 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 98 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 187 vmax_e0 = _mm512_max_ps(vmax_e0, vn11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 201 const __m512 vdelta_e11 = _mm512_sub_ps(vn11, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc2.c | 72 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local 87 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 100 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 189 vmax_e1 = _mm512_max_ps(vmax_e1, vn11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 204 const __m512 vdelta_e11 = _mm512_sub_ps(vn11, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc3.c | 74 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local 89 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 102 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 191 vmax_e2 = _mm512_max_ps(vmax_e2, vn11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 207 const __m512 vdelta_e11 = _mm512_sub_ps(vn11, vmax_e2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192-acc6.c | 80 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() local 95 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 108 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 197 vmax_e5 = _mm512_max_ps(vmax_e5, vn11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 216 const __m512 vdelta_e11 = _mm512_sub_ps(vn11, vmax_e5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx2-p5-x96.c | 74 …const __m256 vn11 = _mm256_round_ps(_mm256_mul_ps(vx11, vlog2e), _MM_FROUND_TO_NEAREST_INT | _MM_F… in xnn_f32_raddextexp_ukernel__avx2_p5_x96() local 89 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 102 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 191 vmax_e0 = _mm256_max_ps(vmax_e0, vn11); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 209 const __m256 vdelta_e11 = _mm256_max_ps(_mm256_sub_ps(vn11, vmax_e0), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc2.c | 76 …const __m256 vn11 = _mm256_round_ps(_mm256_mul_ps(vx11, vlog2e), _MM_FROUND_TO_NEAREST_INT | _MM_F… in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() local 91 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 104 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 193 vmax_e1 = _mm256_max_ps(vmax_e1, vn11); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 212 const __m256 vdelta_e11 = _mm256_max_ps(_mm256_sub_ps(vn11, vmax_e1), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 70 const __m512 vn11 = _mm512_roundscale_ps(_mm512_mul_ps(vx11, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 85 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 98 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 196 const __m512 ve11 = _mm512_add_ps(vn11, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
|