/external/XNNPACK/src/f32-velu/gen/ |
D | velu-scalar-rr2-lut16-p3-x5.c | 53 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local 59 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 85 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 109 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
|
D | velu-scalar-rr2-p6-x5.c | 53 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local 59 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 76 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 100 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
|
D | velu-scalar-rr2-lut16-p3-x6.c | 54 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local 61 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 91 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 117 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
|
D | velu-scalar-rr2-p6-x6.c | 54 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local 61 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 81 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 107 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
|
D | velu-wasm-rr2-lut16-p3-x5.c | 53 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local 59 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 85 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
|
D | velu-wasm-rr2-p6-x5.c | 53 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local 59 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 76 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
|
D | velu-wasm-rr2-lut16-p3-x6.c | 54 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local 61 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 91 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
|
D | velu-wasm-rr2-p6-x6.c | 54 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local 61 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 81 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
|
/external/XNNPACK/src/f16-vsigmoid/gen/ |
D | vsigmoid-avx2-rr1-p2-div-x40.c | 50 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40() local 56 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40() 74 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40() 110 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40()
|
D | vsigmoid-avx2-rr1-p2-rcp-x40.c | 50 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40() local 56 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40() 74 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40() 116 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40()
|
D | vsigmoid-avx2-rr1-p2-rcp-x48.c | 51 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48() local 58 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48() 79 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48() 128 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48()
|
D | vsigmoid-avx2-rr1-p2-div-x48.c | 51 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48() local 58 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48() 79 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48() 121 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48()
|
D | vsigmoid-avx2-rr1-p2-div-x56.c | 52 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56() local 60 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56() 84 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56() 132 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56()
|
D | vsigmoid-avx2-rr1-p2-rcp-x56.c | 52 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56() local 60 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56() 84 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56() 140 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56()
|
D | vsigmoid-avx2-rr1-p2-div-x64.c | 53 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64() local 62 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64() 89 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64() 143 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64()
|
D | vsigmoid-avx2-rr1-p2-rcp-x64.c | 53 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64() local 62 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64() 89 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64() 152 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64()
|
/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-avx2-rr1-p5-div-x40.c | 50 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40() local 56 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40() 74 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40() 128 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40()
|
D | vsigmoid-avx2-rr1-p5-nr1fma-x40.c | 50 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() local 56 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() 74 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() 141 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
|
D | vsigmoid-avx2-rr1-p5-div-x48.c | 51 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48() local 58 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48() 79 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48() 142 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48()
|
D | vsigmoid-avx2-rr1-p5-nr2fma-x40.c | 50 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local 56 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 74 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 146 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
|
D | vsigmoid-avx512f-rr1-p5-scalef-div-x80.c | 49 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80() local 55 __m512 vn4 = _mm512_mul_ps(vz4, vlog2e); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80() 67 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80()
|
D | vsigmoid-avx512f-rr1-lut16-p3-perm-scalef-div-x80.c | 48 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80() local 54 __m512 vn4 = _mm512_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80() 72 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80()
|
D | vsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div-x80.c | 50 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() local 56 __m512 vn4 = _mm512_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 74 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vz4); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
|
D | vsigmoid-avx2-rr1-p5-nr1fma-x48.c | 51 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() local 58 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() 79 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() 157 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
|
D | vsigmoid-avx-rr2-p5-div-x40.c | 51 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40() local 57 __m256 vn4 = _mm256_add_ps(_mm256_mul_ps(vz4, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40() 85 __m256 vt4 = _mm256_add_ps(_mm256_mul_ps(vn4, vminus_ln2_hi), vz4); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40() 145 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40()
|