/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-nr2fma-x40.c | 124 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local 130 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 136 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 142 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
|
D | avx2-rr1-p5-nr2fma-x48.c | 137 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local 144 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 151 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 158 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
|
D | avx2-rr1-p5-nr2fma-x56.c | 150 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local 158 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 166 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 174 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
|
D | avx2-rr1-p5-nr2fma-x64.c | 163 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local 172 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 181 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 190 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
|
D | avx2-rr1-p5-nr2fma-x72.c | 176 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local 186 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 196 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 206 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c | 118 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() local 124 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 130 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
|
D | avx512f-rr1-p5-scalef-nr1fma-x80.c | 115 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() local 121 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() 127 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c | 124 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() local 130 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 136 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
|
D | avx-rr2-p5-nr2-x40.c | 142 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() local 152 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 153 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 159 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
|
D | avx2-rr1-p5-nr2fma-x80.c | 189 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local 200 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 211 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 222 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
|
D | avx512f-rr1-p5-scalef-nr1fma-x96.c | 127 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() local 134 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 141 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
|
D | avx2-rr1-p5-nr1fma-x40.c | 124 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() local 130 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() 137 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c | 136 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() local 143 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 150 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x96.c | 130 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() local 137 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() 144 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
|
D | avx-rr2-p5-nr2-x48.c | 158 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() local 169 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 170 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 178 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
|
D | avx2-rr1-p5-nr1fma-x48.c | 137 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() local 144 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() 152 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c | 148 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local 156 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 164 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c | 142 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local 150 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 158 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
|
D | avx512f-rr1-p5-scalef-nr1fma-x112.c | 139 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() local 147 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 155 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c | 154 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() local 163 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 172 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
|
D | avx512f-rr1-p5-scalef-nr1fma-x128.c | 151 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() local 160 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 169 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128()
|
D | avx-rr2-p5-nr2-x56.c | 174 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() local 186 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 187 vr4 = _mm256_mul_ps(vr4, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr4, vd4))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 197 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c | 160 __m512 vr4 = _mm512_rcp14_ps(vd4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() local 169 vr4 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 178 __m512 vf4 = _mm512_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
|
D | avx2-rr1-p5-nr1fma-x56.c | 150 __m256 vr4 = _mm256_rcp_ps(vd4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() local 158 vr4 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr4, vd4, vone), vr4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() 167 __m256 vf4 = _mm256_mul_ps(ve4, vr4); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56()
|
/external/aac/libFDK/src/ |
D | fft.cpp | 741 FIXP_DBL vr4, ur4; in fft_16() local 769 vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */ in fft_16() 809 x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ in fft_16() 810 x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ in fft_16() 813 vr4 -= x[22]; /* Re A - Re B */ in fft_16() 821 x[26] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */ in fft_16() 822 x[30] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */ in fft_16() 1018 FIXP_DBL vr4, ur4; in fft_32() local 1046 vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */ in fft_32() 1095 x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ in fft_32() [all …]
|