/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-nr2fma-x16.c | 82 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() local 85 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 88 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 91 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
|
D | avx2-rr1-p5-nr2fma-x24.c | 95 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() local 99 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 103 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 107 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
|
D | avx2-rr1-p5-nr2fma-x32.c | 108 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local 113 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 118 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 123 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
|
D | avx2-rr1-p5-nr2fma-x40.c | 121 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local 127 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 133 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 139 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
|
D | avx-rr2-p5-nr2-x16.c | 91 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() local 95 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 96 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 99 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x32.c | 79 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32() local 82 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32() 85 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32()
|
D | avx512f-rr1-p5-scalef-nr1fma-x32.c | 76 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32() local 79 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32() 82 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x32.c | 85 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() local 88 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() 91 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
|
D | avx2-rr1-p5-nr2fma-x48.c | 134 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local 141 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 148 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 155 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
|
D | avx2-rr1-p5-nr1fma-x16.c | 82 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() local 85 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() 89 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
|
D | avx-rr2-p5-nr2-x24.c | 107 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() local 112 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 113 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 118 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x48.c | 97 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() local 101 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() 105 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
|
D | avx512f-rr1-p5-scalef-nr1fma-x48.c | 88 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() local 92 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() 96 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x48.c | 91 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() local 95 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() 99 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
|
D | avx2-rr1-p5-nr1fma-x24.c | 95 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local 99 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() 104 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
|
D | avx512f-rr1-p5-scalef-nr1fma-x64.c | 100 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() local 105 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() 110 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
|
D | avx2-rr1-p5-nr2fma-x56.c | 147 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local 155 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 163 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 171 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
|
D | avx-rr2-p5-nr2-x32.c | 123 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() local 129 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 130 vr1 = _mm256_mul_ps(vr1, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr1, vd1))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 137 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c | 109 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() local 114 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() 119 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c | 103 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() local 108 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() 113 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
|
D | avx2-rr1-p5-nr2fma-x64.c | 160 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local 169 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 178 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 187 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
|
D | avx2-rr1-p5-nr2fma-x72.c | 173 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local 183 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 193 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 203 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c | 115 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() local 121 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 127 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
|
D | avx512f-rr1-p5-scalef-nr1fma-x80.c | 112 __m512 vr1 = _mm512_rcp14_ps(vd1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() local 118 vr1 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() 124 __m512 vf1 = _mm512_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
|
D | avx2-rr1-p5-nr1fma-x32.c | 108 __m256 vr1 = _mm256_rcp_ps(vd1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() local 113 vr1 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr1, vd1, vone), vr1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() 119 __m256 vf1 = _mm256_mul_ps(ve1, vr1); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
|