/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x48.c | 69 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() local 78 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 86 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 95 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() 102 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
|
D | avx2-p5-x56.c | 71 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local 81 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 90 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 100 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 108 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
|
D | avx2-p5-x64.c | 73 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local 84 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 94 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 105 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 114 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x72.c | 75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local 87 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 98 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 110 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 120 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
|
D | avx512f-p5-scalef-x96.c | 65 __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() local 74 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 81 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 127 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
|
D | avx2-p5-x80.c | 77 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local 90 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 102 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 115 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 126 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
|
D | avx512f-p5-scalef-x112.c | 67 __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() local 77 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 85 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 137 __m512 vf5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x64.c | 71 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() local 82 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 92 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 103 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 112 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x64-acc2.c | 72 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() local 83 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 93 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 104 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 113 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
|
D | avx2-p5-x64-acc4.c | 74 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() local 85 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 95 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 106 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 115 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
|
D | avx2-p5-x72.c | 73 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local 85 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 96 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 108 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 118 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
|
D | avx2-p5-x72-acc3.c | 75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local 87 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 98 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 110 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 120 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
|
D | avx2-p5-x80.c | 75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local 88 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 100 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 113 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 124 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc2.c | 76 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local 89 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 101 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 114 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 125 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x64-acc2.c | 73 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() local 84 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 94 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 105 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 114 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
|
D | avx2-p5-x64.c | 72 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() local 83 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 93 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 104 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 113 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x64-acc4.c | 75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() local 86 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() 96 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() 107 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() 116 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4()
|
D | avx2-p5-x72.c | 74 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() local 86 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() 97 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() 109 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72() 119 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72()
|
D | avx2-p5-x72-acc3.c | 76 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() local 88 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() 99 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() 111 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3() 121 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x72_acc3()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x128.c | 60 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local 71 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 80 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 145 vmax_e0 = _mm512_max_ps(vmax_e0, vn5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 155 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x128-acc2.c | 62 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local 73 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 82 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 147 vmax_e1 = _mm512_max_ps(vmax_e1, vn5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 158 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x144.c | 61 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() local 73 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 83 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 154 vmax_e0 = _mm512_max_ps(vmax_e0, vn5); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 165 const __m512 vdelta_e5 = _mm512_sub_ps(vn5, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x96.c | 58 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() local 67 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 74 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 130 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
|
D | avx512f-p5-scalef-x112.c | 59 const __m512 vn5 = _mm512_roundscale_ps(_mm512_mul_ps(vx5, vlog2e), 0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local 69 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 77 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 140 const __m512 ve5 = _mm512_add_ps(vn5, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-div-x48.c | 77 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48() local 86 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48() 94 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48() 102 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48()
|