/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-nr2fma-x56.c | 144 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local 152 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 160 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 168 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
|
D | avx2-rr1-p5-nr2fma-x64.c | 156 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local 165 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 174 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 183 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
|
D | avx2-rr1-p5-nr2fma-x72.c | 168 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local 178 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 188 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 198 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c | 142 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local 150 __m512 vr6 = _mm512_rcp14_ps(vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 158 vr6 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c | 136 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local 144 __m512 vr6 = _mm512_rcp14_ps(vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 152 vr6 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
|
D | avx512f-rr1-p5-scalef-nr1fma-x112.c | 133 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() local 141 __m512 vr6 = _mm512_rcp14_ps(vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 149 vr6 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c | 147 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() local 156 __m512 vr6 = _mm512_rcp14_ps(vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 165 vr6 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
|
D | avx512f-rr1-p5-scalef-nr1fma-x128.c | 144 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() local 153 __m512 vr6 = _mm512_rcp14_ps(vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 162 vr6 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128()
|
D | avx-rr2-p5-nr2-x56.c | 168 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() local 176 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 190 vr6 = _mm256_mul_ps(vr6, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr6, vd6))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56() 191 vr6 = _mm256_mul_ps(vr6, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr6, vd6))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
|
D | avx2-rr1-p5-nr2fma-x80.c | 180 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local 191 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 202 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 213 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c | 153 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() local 162 __m512 vr6 = _mm512_rcp14_ps(vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 171 vr6 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
|
D | avx2-rr1-p5-nr1fma-x56.c | 144 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() local 152 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() 160 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56()
|
D | avx-rr2-p5-nr2-x64.c | 183 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() local 192 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 207 vr6 = _mm256_mul_ps(vr6, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr6, vd6))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64() 208 vr6 = _mm256_mul_ps(vr6, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr6, vd6))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
|
D | avx2-rr1-p5-nr1fma-x64.c | 156 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() local 165 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 174 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
|
D | avx-rr2-p5-nr2-x72.c | 198 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() local 208 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 224 vr6 = _mm256_mul_ps(vr6, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr6, vd6))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72() 225 vr6 = _mm256_mul_ps(vr6, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr6, vd6))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
|
D | avx2-rr1-p5-nr1fma-x72.c | 168 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() local 178 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 188 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
|
D | avx2-rr1-p5-nr1fma-x80.c | 180 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() local 191 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 202 vr6 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr6, vd6, vone), vr6, vr6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x112.c | 136 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() local 144 __m512 vf6 = _mm512_div_ps(ve6, vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
|
D | avx512f-rr1-p5-scalef-div-x112.c | 133 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() local 141 __m512 vf6 = _mm512_div_ps(ve6, vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112()
|
D | avx-rr2-p5-nr2-x80.c | 213 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() local 224 __m256 vr6 = _mm256_rcp_ps(vd6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 241 vr6 = _mm256_mul_ps(vr6, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr6, vd6))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 242 vr6 = _mm256_mul_ps(vr6, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr6, vd6))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x112.c | 142 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() local 150 __m512 vf6 = _mm512_div_ps(ve6, vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
|
D | avx512f-rr1-p5-scalef-div-x128.c | 144 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() local 153 __m512 vf6 = _mm512_div_ps(ve6, vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x128.c | 153 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() local 162 __m512 vf6 = _mm512_div_ps(ve6, vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x128.c | 147 const __m512 vd6 = _mm512_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() local 156 __m512 vf6 = _mm512_div_ps(ve6, vd6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
|
D | avx2-rr1-p5-div-x56.c | 144 const __m256 vd6 = _mm256_add_ps(ve6, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() local 152 __m256 vf6 = _mm256_div_ps(ve6, vd6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
|