Home
last modified time | relevance | path

Searched refs:vd5 (Results 1 – 25 of 44) sorted by relevance

12

/external/XNNPACK/src/f32-sigmoid/gen/
Davx2-rr1-p5-nr2fma-x48.c131 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local
138 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
145 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
152 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
Davx512f-rr1-p5-scalef-nr1fma-x96.c121 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() local
128 __m512 vr5 = _mm512_rcp14_ps(vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
135 vr5 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
Davx2-rr1-p5-nr2fma-x56.c143 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local
151 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
159 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
167 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c130 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() local
137 __m512 vr5 = _mm512_rcp14_ps(vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
144 vr5 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x96.c124 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() local
131 __m512 vr5 = _mm512_rcp14_ps(vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
138 vr5 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
Davx-rr2-p5-nr2-x48.c152 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() local
159 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
171 vr5 = _mm256_mul_ps(vr5, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr5, vd5))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
172 vr5 = _mm256_mul_ps(vr5, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr5, vd5))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
Davx2-rr1-p5-nr2fma-x64.c155 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local
164 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
173 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
182 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
Davx2-rr1-p5-nr2fma-x72.c167 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local
177 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
187 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
197 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
Davx2-rr1-p5-nr1fma-x48.c131 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() local
138 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
145 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c141 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local
149 __m512 vr5 = _mm512_rcp14_ps(vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
157 vr5 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c135 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local
143 __m512 vr5 = _mm512_rcp14_ps(vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
151 vr5 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
Davx512f-rr1-p5-scalef-nr1fma-x112.c132 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() local
140 __m512 vr5 = _mm512_rcp14_ps(vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112()
148 vr5 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c146 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() local
155 __m512 vr5 = _mm512_rcp14_ps(vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
164 vr5 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
Davx512f-rr1-p5-scalef-nr1fma-x128.c143 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() local
152 __m512 vr5 = _mm512_rcp14_ps(vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128()
161 vr5 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128()
Davx-rr2-p5-nr2-x56.c167 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() local
175 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
188 vr5 = _mm256_mul_ps(vr5, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr5, vd5))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
189 vr5 = _mm256_mul_ps(vr5, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr5, vd5))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
Davx2-rr1-p5-nr2fma-x80.c179 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local
190 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
201 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
212 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c152 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() local
161 __m512 vr5 = _mm512_rcp14_ps(vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
170 vr5 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
Davx2-rr1-p5-nr1fma-x56.c143 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() local
151 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56()
159 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56()
Davx-rr2-p5-nr2-x64.c182 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() local
191 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
205 vr5 = _mm256_mul_ps(vr5, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr5, vd5))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
206 vr5 = _mm256_mul_ps(vr5, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr5, vd5))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
Davx2-rr1-p5-nr1fma-x64.c155 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() local
164 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
173 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
Davx512f-rr1-lut16-p3-perm-scalef-div-x96.c124 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() local
131 __m512 vf5 = _mm512_div_ps(ve5, vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
Davx512f-rr1-p5-scalef-div-x96.c121 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() local
128 __m512 vf5 = _mm512_div_ps(ve5, vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96()
Davx512f-rr2-lut32-p2-perm2-scalef-div-x96.c130 const __m512 vd5 = _mm512_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() local
137 __m512 vf5 = _mm512_div_ps(ve5, vd5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
Davx-rr2-p5-nr2-x72.c197 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() local
207 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
222 vr5 = _mm256_mul_ps(vr5, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr5, vd5))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
223 vr5 = _mm256_mul_ps(vr5, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr5, vd5))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
Davx2-rr1-p5-nr1fma-x72.c167 const __m256 vd5 = _mm256_add_ps(ve5, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() local
177 __m256 vr5 = _mm256_rcp_ps(vd5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
187 vr5 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr5, vd5, vone), vr5, vr5); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()

12