/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-div-x56.c | 56 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() local 64 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 88 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 160 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
|
D | avx2-rr1-p5-div-x64.c | 57 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() local 66 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 93 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 174 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
|
D | avx2-rr1-p5-nr1fma-x56.c | 56 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() local 64 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() 88 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() 177 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x112.c | 56 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() local 64 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 88 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
|
D | avx512f-rr1-p5-scalef-div-x112.c | 53 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() local 61 __m512 vn6 = _mm512_mul_ps(vz6, vlog2e); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 77 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112()
|
D | avx-rr2-p5-div-x56.c | 57 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() local 65 __m256 vn6 = _mm256_add_ps(_mm256_mul_ps(vz6, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 103 __m256 vt6 = _mm256_add_ps(_mm256_mul_ps(vn6, vminus_ln2_hi), vz6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56() 183 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
|
D | avx2-rr1-p5-nr2fma-x56.c | 56 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local 64 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 88 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 184 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
|
D | avx2-rr1-p5-div-x72.c | 58 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() local 68 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 98 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 188 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
|
D | avx2-rr1-p5-nr1fma-x64.c | 57 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() local 66 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 93 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 193 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
|
D | avx2-rr1-p5-div-x80.c | 59 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() local 70 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 103 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 202 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x112.c | 62 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() local 70 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 94 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
|
D | avx512f-rr1-p5-scalef-div-x128.c | 54 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() local 63 __m512 vn6 = _mm512_mul_ps(vz6, vlog2e); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 81 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
|
D | avx-rr2-p5-div-x64.c | 58 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() local 67 __m256 vn6 = _mm256_add_ps(_mm256_mul_ps(vz6, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 110 __m256 vt6 = _mm256_add_ps(_mm256_mul_ps(vn6, vminus_ln2_hi), vz6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 200 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
|
D | avx2-rr1-p5-nr2fma-x64.c | 57 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local 66 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 93 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 201 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
|
D | avx2-rr1-p5-nr2fma-x72.c | 58 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local 68 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 98 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 218 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
|
D | avx2-rr1-p5-nr1fma-x72.c | 58 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() local 68 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 98 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 209 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
|
D | avx2-rr1-p5-nr1fma-x80.c | 59 const __m256 vz6 = _mm256_or_ps(vx6, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() local 70 __m256 vn6 = _mm256_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 103 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 225 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vz6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c | 62 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local 70 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 94 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c | 56 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local 64 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 88 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
|
D | avx512f-rr1-p5-scalef-nr1fma-x112.c | 53 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() local 61 __m512 vn6 = _mm512_mul_ps(vz6, vlog2e); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112() 77 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x128.c | 63 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() local 72 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 99 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x128.c | 57 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() local 66 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 93 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c | 57 const __m512 vz6 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx6), vsign_mask)); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() local 66 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 93 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-lut16-p3-perm-x112.c | 58 const __m512 vz6 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local 66 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() 104 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
|
D | velu-avx512f-rr1-p6-x112.c | 58 const __m512 vz6 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() local 66 __m512 vn6 = _mm512_fmadd_ps(vz6, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 89 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112()
|