Home
last modified time | relevance | path

Searched refs:vr4 (Results 1 – 25 of 37) sorted by relevance

12

/external/XNNPACK/src/f32-sigmoid/gen/
Davx2-rr1-p5-nr2fma-x40.c124 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local
130 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
136 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
142 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
Davx2-rr1-p5-nr2fma-x48.c137 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local
144 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
151 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
158 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
Davx2-rr1-p5-nr2fma-x56.c150 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local
158 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
166 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
174 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
Davx2-rr1-p5-nr2fma-x64.c163 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local
172 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
181 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
190 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
Davx2-rr1-p5-nr2fma-x72.c176 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local
186 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
196 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
206 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c118 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() local
124 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
130 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
Davx512f-rr1-p5-scalef-nr1fma-x80.c115 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() local
121 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
127 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c124 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() local
130 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
136 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
Davx-rr2-p5-nr2-x40.c142 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() local
152 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
153 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
159 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
Davx2-rr1-p5-nr2fma-x80.c189 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local
200 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
211 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
222 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
Davx512f-rr1-p5-scalef-nr1fma-x96.c127 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() local
134 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
141 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
Davx2-rr1-p5-nr1fma-x40.c124 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() local
130 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
137 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c136 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() local
143 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
150 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x96.c130 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() local
137 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
144 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
Davx-rr2-p5-nr2-x48.c158 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() local
169 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
170 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
178 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
Davx2-rr1-p5-nr1fma-x48.c137 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() local
144 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
152 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c148 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local
156 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
164 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c142 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local
150 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
158 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
Davx512f-rr1-p5-scalef-nr1fma-x112.c139 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() local
147 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112()
155 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c154 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() local
163 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
172 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
Davx512f-rr1-p5-scalef-nr1fma-x128.c151 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() local
160 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128()
169 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128()
Davx-rr2-p5-nr2-x56.c174 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() local
186 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
187 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
197 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c160 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() local
169 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
178 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
Davx2-rr1-p5-nr1fma-x56.c150 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() local
158 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56()
167 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56()
/external/aac/libFDK/src/
Dfft.cpp741 FIXP_DBL vr4, ur4; in fft_16() local
769 vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */ in fft_16()
809 x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ in fft_16()
810 x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ in fft_16()
813 vr4 -= x[22]; /* Re A - Re B */ in fft_16()
821 x[26] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */ in fft_16()
822 x[30] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */ in fft_16()
1018 FIXP_DBL vr4, ur4; in fft_32() local
1046 vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */ in fft_32()
1095 x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ in fft_32()
[all …]

12