/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-nr2fma-x24.c | 96 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() local 100 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 104 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 108 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
|
D | avx2-rr1-p5-nr2fma-x32.c | 109 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local 114 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 119 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 124 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
|
D | avx2-rr1-p5-nr2fma-x40.c | 122 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local 128 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 134 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 140 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
|
D | avx2-rr1-p5-nr2fma-x48.c | 135 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local 142 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 149 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 156 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
|
D | avx-rr2-p5-nr2-x24.c | 108 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() local 114 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 115 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 119 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x48.c | 98 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() local 102 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() 106 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
|
D | avx512f-rr1-p5-scalef-nr1fma-x48.c | 89 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() local 93 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() 97 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x48.c | 92 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() local 96 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() 100 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
|
D | avx2-rr1-p5-nr1fma-x24.c | 96 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local 100 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() 105 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
|
D | avx512f-rr1-p5-scalef-nr1fma-x64.c | 101 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() local 106 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() 111 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
|
D | avx2-rr1-p5-nr2fma-x56.c | 148 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local 156 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 164 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 172 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
|
D | avx-rr2-p5-nr2-x32.c | 124 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() local 131 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 132 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 138 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c | 110 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() local 115 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() 120 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c | 104 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() local 109 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() 114 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
|
D | avx2-rr1-p5-nr2fma-x64.c | 161 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local 170 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 179 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 188 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
|
D | avx2-rr1-p5-nr2fma-x72.c | 174 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local 184 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 194 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 204 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c | 116 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() local 122 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 128 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
|
D | avx512f-rr1-p5-scalef-nr1fma-x80.c | 113 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() local 119 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() 125 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
|
D | avx2-rr1-p5-nr1fma-x32.c | 109 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() local 114 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() 120 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c | 122 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() local 128 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 134 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
|
D | avx-rr2-p5-nr2-x40.c | 140 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() local 148 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 149 vr2 = _mm256_mul_ps(vr2, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr2, vd2))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 157 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
|
D | avx2-rr1-p5-nr2fma-x80.c | 187 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local 198 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 209 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 220 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
|
D | avx512f-rr1-p5-scalef-nr1fma-x96.c | 125 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() local 132 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96() 139 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96()
|
D | avx2-rr1-p5-nr1fma-x40.c | 122 __m256 vr2 = _mm256_rcp_ps(vd2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() local 128 vr2 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() 135 __m256 vf2 = _mm256_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c | 134 __m512 vr2 = _mm512_rcp14_ps(vd2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() local 141 vr2 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr2, vd2, vone), vr2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 148 __m512 vf2 = _mm512_mul_ps(ve2, vr2); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
|