Home
last modified time | relevance | path

Searched refs:vz4 (Results 1 – 25 of 122) sorted by relevance

12345

/external/XNNPACK/src/f32-velu/gen/
Dvelu-scalar-rr2-lut16-p3-x5.c53 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local
59 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
85 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
109 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
Dvelu-scalar-rr2-p6-x5.c53 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local
59 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
76 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
100 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
Dvelu-scalar-rr2-lut16-p3-x6.c54 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local
61 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
91 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
117 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
Dvelu-scalar-rr2-p6-x6.c54 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local
61 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
81 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
107 if XNN_UNPREDICTABLE(vz4 <= vsat_cutoff) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
Dvelu-wasm-rr2-lut16-p3-x5.c53 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local
59 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
85 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
Dvelu-wasm-rr2-p6-x5.c53 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local
59 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
76 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
Dvelu-wasm-rr2-lut16-p3-x6.c54 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local
61 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
91 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
Dvelu-wasm-rr2-p6-x6.c54 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local
61 float vn4 = vz4 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
81 float vt4 = vn4 * vminus_ln2_hi + vz4; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
/external/XNNPACK/src/f16-vsigmoid/gen/
Dvsigmoid-avx2-rr1-p2-div-x40.c50 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40() local
56 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40()
74 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40()
110 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x40()
Dvsigmoid-avx2-rr1-p2-rcp-x40.c50 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40() local
56 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40()
74 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40()
116 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x40()
Dvsigmoid-avx2-rr1-p2-rcp-x48.c51 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48() local
58 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48()
79 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48()
128 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x48()
Dvsigmoid-avx2-rr1-p2-div-x48.c51 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48() local
58 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48()
79 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48()
121 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x48()
Dvsigmoid-avx2-rr1-p2-div-x56.c52 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56() local
60 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56()
84 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56()
132 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x56()
Dvsigmoid-avx2-rr1-p2-rcp-x56.c52 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56() local
60 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56()
84 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56()
140 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x56()
Dvsigmoid-avx2-rr1-p2-div-x64.c53 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64() local
62 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64()
89 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64()
143 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_div_x64()
Dvsigmoid-avx2-rr1-p2-rcp-x64.c53 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64() local
62 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64()
89 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64()
152 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x64()
/external/XNNPACK/src/f32-vsigmoid/gen/
Dvsigmoid-avx2-rr1-p5-div-x40.c50 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40() local
56 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40()
74 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40()
128 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40()
Dvsigmoid-avx2-rr1-p5-nr1fma-x40.c50 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() local
56 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
74 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
141 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
Dvsigmoid-avx2-rr1-p5-div-x48.c51 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48() local
58 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48()
79 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48()
142 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x48()
Dvsigmoid-avx2-rr1-p5-nr2fma-x40.c50 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local
56 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
74 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
146 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
Dvsigmoid-avx512f-rr1-p5-scalef-div-x80.c49 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80() local
55 __m512 vn4 = _mm512_mul_ps(vz4, vlog2e); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80()
67 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80()
Dvsigmoid-avx512f-rr1-lut16-p3-perm-scalef-div-x80.c48 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80() local
54 __m512 vn4 = _mm512_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80()
72 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80()
Dvsigmoid-avx512f-rr2-lut32-p2-perm2-scalef-div-x80.c50 const __m512 vz4 = _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vx4), vsign_mask)); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() local
56 __m512 vn4 = _mm512_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
74 __m512 vt4 = _mm512_fmadd_ps(vn4, vminus_ln2_hi, vz4); in xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
Dvsigmoid-avx2-rr1-p5-nr1fma-x48.c51 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() local
58 __m256 vn4 = _mm256_fmadd_ps(vz4, vlog2e, vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
79 __m256 vt4 = _mm256_fmadd_ps(vn4, vminus_ln2, vz4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
157 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
Dvsigmoid-avx-rr2-p5-div-x40.c51 const __m256 vz4 = _mm256_or_ps(vx4, vsign_mask); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40() local
57 __m256 vn4 = _mm256_add_ps(_mm256_mul_ps(vz4, vlog2e), vmagic_bias); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40()
85 __m256 vt4 = _mm256_add_ps(_mm256_mul_ps(vn4, vminus_ln2_hi), vz4); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40()
145 vf4 = _mm256_andnot_ps(_mm256_cmp_ps(vz4, vdenorm_cutoff, _CMP_LT_OS), vf4); in xnn_f32_vsigmoid_ukernel__avx_rr2_p5_div_x40()

12345