/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x80-acc2.c | 80 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local 93 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 105 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 118 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 129 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx2-p5-x80.c | 79 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local 92 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 104 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 117 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 128 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc5.c | 83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() local 96 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 108 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 121 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 132 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
|
D | avx2-p5-x96.c | 83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 98 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 112 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 127 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 140 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc2.c | 84 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 99 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 113 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 128 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 141 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc3.c | 85 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local 100 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 114 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 129 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 142 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc6.c | 88 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local 103 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 117 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 132 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 145 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x80.c | 81 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local 94 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 106 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 119 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 130 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x88.c | 83 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local 97 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 110 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 124 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 136 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
|
D | avx2-p5-x96.c | 85 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 100 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 114 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 129 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 142 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x80.c | 80 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() local 93 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 105 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 118 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 129 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc5.c | 84 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() local 97 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 109 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 122 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 133 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
|
D | avx2-p5-x80-acc2.c | 81 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() local 94 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 106 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 119 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 130 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx2-p5-x96-acc6.c | 89 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local 104 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 118 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 133 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 146 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96-acc3.c | 86 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local 101 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 115 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 130 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 143 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc2.c | 85 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local 100 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 114 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 129 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 142 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96.c | 84 __m256 vn9 = _mm256_fmadd_ps(vx9, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() local 99 const __m256 vs9 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn9), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 113 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 128 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 141 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x160.c | 66 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() local 79 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 90 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 167 vmax_e0 = _mm512_max_ps(vmax_e0, vn9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160() 179 const __m512 vdelta_e9 = _mm512_sub_ps(vn9, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160()
|
D | avx512f-p5-scalef-x160-acc2.c | 68 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() local 81 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 92 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 169 vmax_e1 = _mm512_max_ps(vmax_e1, vn9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 182 const __m512 vdelta_e9 = _mm512_sub_ps(vn9, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
|
D | avx512f-p5-scalef-x192.c | 68 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 83 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 96 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 185 vmax_e0 = _mm512_max_ps(vmax_e0, vn9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 199 const __m512 vdelta_e9 = _mm512_sub_ps(vn9, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc2.c | 70 const __m512 vn9 = _mm512_roundscale_ps(_mm512_mul_ps(vx9, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local 85 __m512 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_hi, vx9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 98 vt9 = _mm512_fmadd_ps(vn9, vminus_ln2_lo, vt9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 187 vmax_e1 = _mm512_max_ps(vmax_e1, vn9); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 202 const __m512 vdelta_e9 = _mm512_sub_ps(vn9, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx2-rr1-lut8-p4-perm-x80.c | 75 __m256 vn9 = _mm256_fmadd_ps(vz9, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() local 104 const __m256i ven9 = _mm256_slli_epi32(_mm256_castps_si256(vn9), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 105 const __m256i vl9 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn9)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 106 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 127 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2, vz9); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
|
D | velu-avx2-rr1-lut4-p4-perm-x80.c | 76 __m256 vn9 = _mm256_fmadd_ps(vz9, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() local 105 const __m256i ven9 = _mm256_slli_epi32(_mm256_castps_si256(vn9), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 106 const __m256i vl9 = _mm256_castps_si256(_mm256_permutevar_ps(vtable, _mm256_castps_si256(vn9))); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 107 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() 128 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2, vz9); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80()
|
D | velu-avx2-rr1-lut16-p3-gather-x80.c | 75 __m256 vn9 = _mm256_fmadd_ps(vz9, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() local 95 const __m256i vidx9 = _mm256_and_si256(_mm256_castps_si256(vn9), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 116 const __m256i ven9 = _mm256_slli_epi32(_mm256_castps_si256(vn9), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 117 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 138 __m256 vt9 = _mm256_fmadd_ps(vn9, vminus_ln2, vz9); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx-rr2-p5-div-x80.c | 74 __m256 vn9 = _mm256_add_ps(_mm256_mul_ps(vz9, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() local 103 …_m128 vs_lo9 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn9)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 104 …128 vs_hi9 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn9, 1)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 116 vn9 = _mm256_sub_ps(vn9, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 127 __m256 vt9 = _mm256_add_ps(_mm256_mul_ps(vn9, vminus_ln2_hi), vz9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 138 vt9 = _mm256_add_ps(_mm256_mul_ps(vn9, vminus_ln2_lo), vt9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
|