/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-nr2fma-x16.c | 81 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() local 84 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 87 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 90 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
|
D | avx2-rr1-p5-nr2fma-x24.c | 94 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() local 98 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 102 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 106 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
|
D | avx2-rr1-p5-nr2fma-x32.c | 107 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local 112 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 117 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 122 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
|
D | avx2-rr1-p5-nr2fma-x40.c | 120 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local 126 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 132 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 138 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
|
D | avx-rr2-p5-nr2-x16.c | 90 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() local 93 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 94 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 98 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x32.c | 78 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32() local 81 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32() 84 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32()
|
D | avx512f-rr1-p5-scalef-nr1fma-x32.c | 75 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32() local 78 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32() 81 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x32.c | 84 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() local 87 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32() 90 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32()
|
D | avx2-rr1-p5-nr2fma-x48.c | 133 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local 140 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 147 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 154 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
|
D | avx2-rr1-p5-nr1fma-x16.c | 81 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() local 84 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() 88 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
|
D | avx-rr2-p5-nr2-x24.c | 106 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() local 110 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 111 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 117 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x48.c | 96 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() local 100 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48() 104 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48()
|
D | avx512f-rr1-p5-scalef-nr1fma-x48.c | 87 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() local 91 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48() 95 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x48.c | 90 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() local 94 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48() 98 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48()
|
D | avx2-rr1-p5-nr1fma-x24.c | 94 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local 98 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() 103 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
|
D | avx512f-rr1-p5-scalef-nr1fma-x64.c | 99 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() local 104 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64() 109 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64()
|
D | avx2-rr1-p5-nr2fma-x56.c | 146 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local 154 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 162 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 170 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
|
D | avx-rr2-p5-nr2-x32.c | 122 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() local 127 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 128 vr0 = _mm256_mul_ps(vr0, _mm256_sub_ps(vtwo, _mm256_mul_ps(vr0, vd0))); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 136 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x64.c | 108 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() local 113 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64() 118 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x64.c | 102 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() local 107 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64() 112 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64()
|
D | avx2-rr1-p5-nr2fma-x64.c | 159 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local 168 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 177 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 186 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
|
D | avx2-rr1-p5-nr2fma-x72.c | 172 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local 182 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 192 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 202 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c | 114 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() local 120 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 126 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
|
D | avx512f-rr1-p5-scalef-nr1fma-x80.c | 111 __m512 vr0 = _mm512_rcp14_ps(vd0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() local 117 vr0 = _mm512_fmadd_ps(_mm512_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80() 123 __m512 vf0 = _mm512_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80()
|
D | avx2-rr1-p5-nr1fma-x32.c | 107 __m256 vr0 = _mm256_rcp_ps(vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() local 112 vr0 = _mm256_fmadd_ps(_mm256_fnmadd_ps(vr0, vd0, vone), vr0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() 118 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
|