Home
last modified time | relevance | path

Searched refs:vn5 (Results 1 – 25 of 178) sorted by relevance

12345678

/external/XNNPACK/src/f32-velu/gen/
Dvelu-scalar-rr2-lut16-p3-x6.c64 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local
81 const uint32_t ven5 = fp32_to_bits(vn5) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
82 const uint32_t vidx5 = fp32_to_bits(vn5) & vindex_mask; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
83 vn5 -= vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
95 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
123 vt5 = vn5 * vminus_ln2_lo + vt5; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
Dvelu-wasm-rr2-lut16-p3-x6.c64 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local
81 const uint32_t ven5 = fp32_to_bits(vn5) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
82 const uint32_t vidx5 = fp32_to_bits(vn5) & vindex_mask; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
83 vn5 -= vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
95 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
103 vt5 = vn5 * vminus_ln2_lo + vt5; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
Dvelu-wasm-rr2-p6-x6.c64 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local
76 float vs5 = fp32_from_bits(fp32_to_bits(vn5) << 23); in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
77 vn5 -= vmagic_bias; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
84 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
91 vt5 = vn5 * vminus_ln2_lo + vt5; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
Dvelu-scalar-rr2-p6-x6.c64 float vn5 = vz5 * vlog2e + vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local
76 float vs5 = fp32_from_bits(fp32_to_bits(vn5) << 23); in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
77 vn5 -= vmagic_bias; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
84 float vt5 = vn5 * vminus_ln2_hi + vz5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
91 vt5 = vn5 * vminus_ln2_lo + vt5; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
Dvelu-avx2-rr1-lut16-p3-gather-x48.c63 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() local
75 const __m256i vidx5 = _mm256_and_si256(_mm256_castps_si256(vn5), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
88 const __m256i ven5 = _mm256_slli_epi32(_mm256_castps_si256(vn5), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
89 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
102 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
Dvelu-avx512f-rr1-lut16-p3-perm-x96.c63 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() local
75 const __m512i ven5 = _mm512_slli_epi32(_mm512_castps_si512(vn5), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
76 const __m512i vl5 = _mm512_permutexvar_epi32(_mm512_castps_si512(vn5), vtable); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
89 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
96 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
Dvelu-avx2-rr1-lut8-p4-perm-x48.c63 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() local
80 const __m256i ven5 = _mm256_slli_epi32(_mm256_castps_si256(vn5), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
81 const __m256i vl5 = _mm256_permutevar8x32_epi32(vtable, _mm256_castps_si256(vn5)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
82 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
95 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
Dvelu-avx2-rr1-lut4-p4-perm-x48.c64 __m256 vn5 = _mm256_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48() local
81 const __m256i ven5 = _mm256_slli_epi32(_mm256_castps_si256(vn5), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48()
82 const __m256i vl5 = _mm256_castps_si256(_mm256_permutevar_ps(vtable, _mm256_castps_si256(vn5))); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48()
83 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48()
96 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48()
/external/XNNPACK/src/f32-sigmoid/gen/
Davx512f-rr2-lut32-p2-perm2-scalef-div-x96.c67 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() local
74 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
81 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
88 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
95 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
123 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
Davx512f-rr2-lut32-p2-perm2-scalef-div-x112.c69 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() local
77 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
85 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
93 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
101 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
133 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c67 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() local
74 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
81 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
88 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
95 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
123 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
Davx512f-rr1-lut16-p3-perm-scalef-div-x96.c61 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() local
68 const __m512 vl5 = _mm512_permutexvar_ps(_mm512_castps_si512(vn5), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
75 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
82 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
117 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c69 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() local
77 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
85 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
93 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
101 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
133 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
Davx512f-rr2-lut32-p2-perm2-scalef-div-x128.c71 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() local
80 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
89 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
98 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
107 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
143 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
Davx512f-rr1-lut16-p3-perm-scalef-div-x112.c63 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() local
71 const __m512 vl5 = _mm512_permutexvar_ps(_mm512_castps_si512(vn5), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
79 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
87 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
127 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x96.c61 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96() local
68 const __m512 vl5 = _mm512_permutexvar_ps(_mm512_castps_si512(vn5), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
75 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
82 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
117 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96()
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c71 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() local
80 const __m512 vl5 = _mm512_permutex2var_ps(vtable_lo, _mm512_castps_si512(vn5), vtable_hi); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
89 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
98 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_hi, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
107 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
143 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
Davx-rr2-p5-div-x48.c62 __m256 vn5 = _mm256_add_ps(_mm256_mul_ps(vz5, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() local
79 …_m128 vs_lo5 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn5)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
80 …128 vs_hi5 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn5, 1)), 23)); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
88 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
95 __m256 vt5 = _mm256_add_ps(_mm256_mul_ps(vn5, vminus_ln2_hi), vz5); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
102 vt5 = _mm256_add_ps(_mm256_mul_ps(vn5, vminus_ln2_lo), vt5); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
Davx512f-rr1-p5-scalef-div-x96.c58 __m512 vn5 = _mm512_mul_ps(vz5, vlog2e); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96() local
65 vn5 = _mm512_roundscale_ps(vn5, 0); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96()
72 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96()
114 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96()
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c63 __m512 vn5 = _mm512_fmadd_ps(vz5, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() local
71 const __m512 vl5 = _mm512_permutexvar_ps(_mm512_castps_si512(vn5), vtable); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
79 vn5 = _mm512_sub_ps(vn5, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
87 __m512 vt5 = _mm512_fmadd_ps(vn5, vminus_ln2, vz5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
127 const __m512 ve5 = _mm512_scalef_ps(vp5, vn5); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x48.c69 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48() local
78 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
86 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
95 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
102 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x48()
Davx2-p5-x56.c71 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local
81 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
90 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
100 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
108 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
Davx2-p5-x64.c73 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local
84 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
94 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
105 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
114 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx2-p5-x64-acc2.c72 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() local
83 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
93 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
104 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
113 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx2-p5-x64.c72 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() local
83 const __m256 vs5 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn5), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
93 vn5 = _mm256_sub_ps(vn5, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
104 __m256 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_hi, vx5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
113 vt5 = _mm256_fmadd_ps(vn5, vminus_ln2_lo, vt5); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()

12345678