Home
last modified time | relevance | path

Searched refs:vn6 (Results 1 – 25 of 148) sorted by relevance

123456

/external/XNNPACK/src/f32-sigmoid/gen/
Davx512f-rr2-lut32-p2-perm2-scalef-div-x112.c70 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() local
78 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
86 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
94 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
102 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
134 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c70 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local
78 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
86 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
94 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
102 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
134 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
Davx512f-rr2-lut32-p2-perm2-scalef-div-x128.c72 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() local
81 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
90 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
99 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
108 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
144 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
Davx512f-rr1-lut16-p3-perm-scalef-div-x112.c64 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() local
72 const __m512 vl6 = _mm512_permutexvar_ps(_mm512_castps_si512(vn6), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
80 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
88 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
128 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c72 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() local
81 const __m512 vl6 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn6), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
90 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
99 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
108 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
144 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c64 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local
72 const __m512 vl6 = _mm512_permutexvar_ps(_mm512_castps_si512(vn6), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
80 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
88 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
128 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
Davx512f-rr1-lut16-p3-perm-scalef-div-x128.c66 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() local
75 const __m512 vl6 = _mm512_permutexvar_ps(_mm512_castps_si512(vn6), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
84 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
93 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
138 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
Davx-rr2-p5-div-x56.c65 __m256 vn6 = _mm256_add_ps(_mm256_mul_ps(vz6, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() local
85 …_m128 vs_lo6 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn6)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
86 …128 vs_hi6 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn6, 1)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
95 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
103 __m256 vt6 = _mm256_add_ps(_mm256_mul_ps(vn6, vminus_ln2_hi), vz6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
111 vt6 = _mm256_add_ps(_mm256_mul_ps(vn6, vminus_ln2_lo), vt6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
Davx512f-rr1-p5-scalef-div-x112.c61 __m512 vn6 = _mm512_mul_ps(vz6, vlog2e); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() local
69 vn6 = _mm512_roundscale_ps(vn6, 0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112()
77 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112()
125 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c66 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() local
75 const __m512 vl6 = _mm512_permutexvar_ps(_mm512_castps_si512(vn6), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
84 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
93 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
138 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x56.c72 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local
82 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
91 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
101 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
109 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
Davx2-p5-x64.c74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local
85 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
95 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
106 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
115 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx2-p5-x64-acc2.c73 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() local
84 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
94 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
105 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
114 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
Davx2-p5-x64-acc4.c75 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() local
86 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
96 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
107 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
116 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
Davx2-p5-x64.c72 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() local
83 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
93 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
104 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
113 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
Davx2-p5-x72-acc3.c76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local
88 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
99 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
111 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
121 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
Davx2-p5-x72.c74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local
86 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
97 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
109 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
119 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx2-p5-x64.c73 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() local
84 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
94 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
105 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
114 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
Davx2-p5-x64-acc2.c74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() local
85 const __m256 vs6 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn6), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
95 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
106 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
115 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx512f-rr1-lut16-p3-perm-x112.c66 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local
80 const __m512i ven6 = _mm512_slli_epi32(_mm512_castps_si512(vn6), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
81 const __m512i vl6 = _mm512_permutexvar_epi32(_mm512_castps_si512(vn6), vtable); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
96 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
104 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
Dvelu-avx512f-rr1-lut16-p3-perm-x128.c68 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local
83 const __m512i ven6 = _mm512_slli_epi32(_mm512_castps_si512(vn6), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
84 const __m512i vl6 = _mm512_permutexvar_epi32(_mm512_castps_si512(vn6), vtable); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
101 vn6 = _mm512_sub_ps(vn6, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
111 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
Dvelu-avx2-rr1-lut4-p4-perm-x56.c67 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() local
87 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
88 const __m256i vl6 = _mm256_castps_si256(_mm256_permutevar_ps(vtable, _mm256_castps_si256(vn6))); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
89 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
104 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
Dvelu-avx2-rr1-lut8-p4-perm-x56.c66 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() local
86 const __m256i ven6 = _mm256_slli_epi32(_mm256_castps_si256(vn6), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
87 const __m256i vl6 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn6)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
88 vn6 = _mm256_sub_ps(vn6, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
103 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
/external/XNNPACK/src/f32-raddextexp/gen/
Davx512f-p5-scalef-x128-acc2.c63 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local
74 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
83 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
148 vmax_e0 = _mm512_max_ps(vmax_e0, vn6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
159 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
Davx512f-p5-scalef-x128.c61 const __m512 vn6 = _mm512_roundscale_ps(_mm512_mul_ps(vx6, vlog2e), 0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local
72 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
81 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
146 vmax_e0 = _mm512_max_ps(vmax_e0, vn6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
156 const __m512 vdelta_e6 = _mm512_sub_ps(vn6, vmax_e0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()

123456