Home
last modified time | relevance | path

Searched refs:vr3 (Results 1 – 25 of 42) sorted by relevance

12

/external/XNNPACK/src/f32-sigmoid/gen/
Davx2-rr1-p5-nr2fma-x32.c110 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local
115 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
120 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
125 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
Davx2-rr1-p5-nr2fma-x40.c123 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local
129 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
135 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
141 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
Davx2-rr1-p5-nr2fma-x48.c136 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local
143 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
150 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
157 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
Davx512f-rr1-p5-scalef-nr1fma-x64.c102 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() local
107 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
112 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
Davx2-rr1-p5-nr2fma-x56.c149 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local
157 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
165 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
173 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
Davx-rr2-p5-nr2-x32.c125 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() local
133 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
134 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
139 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c111 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() local
116 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
121 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c105 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() local
110 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
115 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
Davx2-rr1-p5-nr2fma-x64.c162 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local
171 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
180 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
189 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
Davx2-rr1-p5-nr2fma-x72.c175 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local
185 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
195 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
205 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c117 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() local
123 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
129 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
Davx512f-rr1-p5-scalef-nr1fma-x80.c114 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() local
120 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
126 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
Davx2-rr1-p5-nr1fma-x32.c110 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() local
115 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
121 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c123 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() local
129 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
135 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
Davx-rr2-p5-nr2-x40.c141 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() local
150 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
151 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
158 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
Davx2-rr1-p5-nr2fma-x80.c188 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local
199 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
210 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
221 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
Davx512f-rr1-p5-scalef-nr1fma-x96.c126 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() local
133 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
140 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
Davx2-rr1-p5-nr1fma-x40.c123 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() local
129 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
136 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c135 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() local
142 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
149 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x96.c129 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() local
136 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
143 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
Davx-rr2-p5-nr2-x48.c157 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() local
167 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
168 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
177 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
Davx2-rr1-p5-nr1fma-x48.c136 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() local
143 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
151 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c147 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local
155 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
163 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c141 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local
149 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
157 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
Davx512f-rr1-p5-scalef-nr1fma-x112.c138 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() local
146 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112()
154 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112()

12