/external/XNNPACK/src/f32-velu/gen/ |
D | velu-scalar-rr2-lut16-p3-x6.c | 55 const float vz5 = vx5 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local 62 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 93 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 122 if XNN_UNPREDICTABLE(vz5 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
|
D | velu-scalar-rr2-p6-x6.c | 55 const float vz5 = vx5 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local 62 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 82 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 111 if XNN_UNPREDICTABLE(vz5 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
|
D | velu-wasm-rr2-lut16-p3-x6.c | 55 …const float vz5 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx5 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local 62 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 93 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
|
D | velu-wasm-rr2-p6-x6.c | 55 …const float vz5 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx5 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local 62 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 82 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
|
/external/XNNPACK/src/f16-vsigmoid/gen/ |
D | vsigmoid-avx2-rr1-p2-rcp-x48.c | 52 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48() local 59 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48() 80 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48() 129 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48()
|
D | vsigmoid-avx2-rr1-p2-div-x48.c | 52 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48() local 59 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48() 80 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48() 122 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48()
|
D | vsigmoid-avx2-rr1-p2-div-x56.c | 53 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56() local 61 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56() 85 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56() 133 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56()
|
D | vsigmoid-avx2-rr1-p2-rcp-x56.c | 53 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56() local 61 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56() 85 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56() 141 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56()
|
D | vsigmoid-avx2-rr1-p2-div-x64.c | 54 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64() local 63 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64() 90 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64() 144 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64()
|
D | vsigmoid-avx2-rr1-p2-rcp-x64.c | 54 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64() local 63 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64() 90 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64() 153 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64()
|
/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-avx2-rr1-p5-div-x48.c | 52 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48() local 59 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48() 80 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48() 143 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48()
|
D | vsigmoid-avx2-rr1-p5-nr1fma-x48.c | 52 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() local 59 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() 80 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() 158 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
|
D | vsigmoid-avx2-rr1-p5-div-x56.c | 53 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56() local 61 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56() 85 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56() 157 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x56()
|
D | vsigmoid-avx512f-rr1-p5-scalef-div-x96.c | 51 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() local 58 __m512 vn5 = _mm512_mul_ps(vz5, vlog2e); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() 72 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96()
|
D | vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-div-x96.c | 50 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() local 57 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 78 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
|
D | vsigmoid-avx2-rr1-p5-nr1fma-x56.c | 53 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() local 61 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() 85 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() 174 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x56()
|
D | vsigmoid-avx2-rr1-p5-nr2fma-x48.c | 52 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local 59 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 80 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 164 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
|
D | vsigmoid-avx2-rr1-p5-div-x64.c | 54 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64() local 63 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64() 90 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64() 171 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x64()
|
D | vsigmoid-avx2-rr1-p5-div-x72.c | 55 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72() local 65 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72() 95 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72() 185 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x72()
|
D | vsigmoid-avx2-rr1-p5-nr1fma-x64.c | 54 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() local 63 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 90 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 190 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
|
D | vsigmoid-avx-rr2-p5-div-x48.c | 53 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48() local 60 __m256 vn5 = _mm256_add_ps(_mm256_mul_ps(vz5, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48() 93 __m256 vt5 = _mm256_add_ps(_mm256_mul_ps(vn5, vminus_ln2_hi), vz5); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48() 163 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x48()
|
D | vsigmoid-avx2-rr1-p5-nr2fma-x56.c | 53 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() local 61 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 85 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56() 181 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x56()
|
D | vsigmoid-avx-rr2-p5-div-x56.c | 54 const __m256 vz5 = _mm256_or_ps(vx5, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56() local 62 __m256 vn5 = _mm256_add_ps(_mm256_mul_ps(vz5, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56() 100 __m256 vt5 = _mm256_add_ps(_mm256_mul_ps(vn5, vminus_ln2_hi), vz5); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56() 180 vf5 = _mm256_andnot_ps(_mm256_cmp_ps(vz5, vdenorm_cutoff, _CMP_LT_OS), vf5); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x56()
|
D | vsigmoid-avx512f-rr1-p5-scalef-div-x112.c | 52 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() local 60 __m512 vn5 = _mm512_mul_ps(vz5, vlog2e); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 76 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112()
|
D | vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div-x96.c | 52 const __m512 vz5 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx5), vsign_mask)); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() local 59 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 80 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
|