/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-scalar-rr2-lut2048-p1-div-x4.c | 48 const float vz3 = fabsf(vx3); in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4() local 53 float vn3 = vz3 * vminus_log2e + vmagic_bias; in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4() 77 float vt3 = vn3 * vln2_hi + vz3; in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4() 113 if XNN_UNPREDICTABLE(vz3 > vdenorm_cutoff) { in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x4()
|
D | vsigmoid-scalar-rr2-p5-div-x4.c | 48 const float vz3 = fabsf(vx3); in xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4() local 53 float vn3 = vz3 * vminus_log2e + vmagic_bias; in xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4() 68 float vt3 = vn3 * vln2_hi + vz3; in xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4() 124 if XNN_UNPREDICTABLE(vz3 > vdenorm_cutoff) { in xnn_f32_vsigmoid_ukernel__scalar_rr2_p5_div_x4()
|
D | vsigmoid-scalar-rr2-lut64-p2-div-x4.c | 48 const float vz3 = fabsf(vx3); in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4() local 53 float vn3 = vz3 * vminus_log2e + vmagic_bias; in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4() 77 float vt3 = vn3 * vln2_hi + vz3; in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4() 118 if XNN_UNPREDICTABLE(vz3 > vdenorm_cutoff) { in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x4()
|
D | vsigmoid-avx2-rr1-p5-div-x32.c | 48 const __m256 vz3 = _mm256_or_ps(vx3, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32() local 53 __m256 vn3 = _mm256_fmadd_ps(vz3, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32() 68 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32() 113 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vz3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x32()
|
D | vsigmoid-avx2-rr1-p5-nr2fma-x32.c | 48 const __m256 vz3 = _mm256_or_ps(vx3, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local 53 __m256 vn3 = _mm256_fmadd_ps(vz3, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 68 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 128 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vz3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
|
D | vsigmoid-avx2-rr1-p5-div-x40.c | 49 const __m256 vz3 = _mm256_or_ps(vx3, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40() local 55 __m256 vn3 = _mm256_fmadd_ps(vz3, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40() 73 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40() 127 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vz3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40()
|
D | vsigmoid-avx2-rr1-p5-nr1fma-x32.c | 48 const __m256 vz3 = _mm256_or_ps(vx3, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() local 53 __m256 vn3 = _mm256_fmadd_ps(vz3, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() 68 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() 124 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vz3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-scalar-rr2-lut16-p3-x4.c | 51 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() local 56 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 77 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 96 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
|
D | velu-scalar-rr2-p6-x4.c | 51 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() local 56 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 70 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 89 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
|
D | velu-scalar-rr2-lut16-p3-x5.c | 52 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local 58 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 83 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 104 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
|
D | velu-scalar-rr2-p6-x5.c | 52 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local 58 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 75 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 96 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
|
D | velu-scalar-rr2-lut16-p3-x6.c | 53 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local 60 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 89 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 112 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
|
D | velu-scalar-rr2-p6-x6.c | 53 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local 60 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 80 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 103 if XNN_UNPREDICTABLE(vz3 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
|
D | velu-wasm-rr2-lut16-p3-x4.c | 51 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() local 56 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 77 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
|
D | velu-wasm-rr2-p6-x4.c | 51 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() local 56 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 70 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
|
D | velu-wasm-rr2-lut16-p3-x5.c | 52 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local 58 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 83 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
|
D | velu-wasm-rr2-p6-x5.c | 52 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local 58 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 75 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
|
D | velu-wasm-rr2-lut16-p3-x6.c | 53 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local 60 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 89 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
|
D | velu-wasm-rr2-p6-x6.c | 53 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local 60 float vn3 = vz3 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 80 float vt3 = vn3 * vminus_ln2_hi + vz3; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
|
/external/XNNPACK/src/f16-vsigmoid/gen/ |
D | vsigmoid-avx2-rr1-p2-div-x32.c | 48 const __m256 vz3 = _mm256_or_ps(vx3, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x32() local 53 __m256 vn3 = _mm256_fmadd_ps(vz3, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x32() 68 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x32() 98 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vz3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x32()
|
D | vsigmoid-avx2-rr1-p2-rcp-x32.c | 48 const __m256 vz3 = _mm256_or_ps(vx3, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x32() local 53 __m256 vn3 = _mm256_fmadd_ps(vz3, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x32() 68 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x32() 103 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vz3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x32()
|
D | vsigmoid-avx2-rr1-p2-div-x40.c | 49 const __m256 vz3 = _mm256_or_ps(vx3, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40() local 55 __m256 vn3 = _mm256_fmadd_ps(vz3, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40() 73 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40() 109 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vz3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40()
|
D | vsigmoid-avx2-rr1-p2-rcp-x40.c | 49 const __m256 vz3 = _mm256_or_ps(vx3, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40() local 55 __m256 vn3 = _mm256_fmadd_ps(vz3, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40() 73 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40() 115 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vz3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40()
|
D | vsigmoid-avx2-rr1-p2-rcp-x48.c | 50 const __m256 vz3 = _mm256_or_ps(vx3, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48() local 57 __m256 vn3 = _mm256_fmadd_ps(vz3, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48() 78 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48() 127 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vz3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48()
|
D | vsigmoid-avx2-rr1-p2-div-x48.c | 50 const __m256 vz3 = _mm256_or_ps(vx3, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48() local 57 __m256 vn3 = _mm256_fmadd_ps(vz3, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48() 78 __m256 vt3 = _mm256_fmadd_ps(vn3, vminus_ln2, vz3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48() 120 vf3 = _mm256_andnot_ps(_mm256_cmp_ps(vz3, vdenorm_cutoff, _CMP_LT_OS), vf3); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48()
|