/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-nr2fma-x32.c | 110 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local 115 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 120 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 125 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
|
D | avx2-rr1-p5-nr2fma-x40.c | 123 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local 129 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 135 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 141 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
|
D | avx2-rr1-p5-nr2fma-x48.c | 136 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local 143 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 150 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 157 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
|
D | avx512f-rr1-p5-scalef-nr1fma-x64.c | 102 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() local 107 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() 112 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
|
D | avx2-rr1-p5-nr2fma-x56.c | 149 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local 157 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 165 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 173 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
|
D | avx-rr2-p5-nr2-x32.c | 125 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() local 133 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 134 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 139 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c | 111 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() local 116 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() 121 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c | 105 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() local 110 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() 115 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
|
D | avx2-rr1-p5-nr2fma-x64.c | 162 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local 171 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 180 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 189 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
|
D | avx2-rr1-p5-nr2fma-x72.c | 175 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local 185 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 195 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 205 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c | 117 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() local 123 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 129 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
|
D | avx512f-rr1-p5-scalef-nr1fma-x80.c | 114 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() local 120 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() 126 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
|
D | avx2-rr1-p5-nr1fma-x32.c | 110 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() local 115 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() 121 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c | 123 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() local 129 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 135 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
|
D | avx-rr2-p5-nr2-x40.c | 141 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() local 150 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 151 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 158 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
|
D | avx2-rr1-p5-nr2fma-x80.c | 188 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local 199 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 210 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 221 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
|
D | avx512f-rr1-p5-scalef-nr1fma-x96.c | 126 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() local 133 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 140 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
|
D | avx2-rr1-p5-nr1fma-x40.c | 123 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() local 129 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() 136 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c | 135 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() local 142 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 149 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x96.c | 129 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() local 136 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() 143 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
|
D | avx-rr2-p5-nr2-x48.c | 157 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() local 167 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 168 vr3 = _mm256_mul_ps(vr3, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr3, vd3))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48() 177 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
|
D | avx2-rr1-p5-nr1fma-x48.c | 136 __m256 vr3 = _mm256_rcp_ps(vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() local 143 vr3 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() 151 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c | 147 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local 155 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 163 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c | 141 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local 149 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 157 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
|
D | avx512f-rr1-p5-scalef-nr1fma-x112.c | 138 __m512 vr3 = _mm512_rcp14_ps(vd3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() local 146 vr3 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr3, vd3, vone), vr3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 154 __m512 vf3 = _mm512_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112()
|