Home
last modified time | relevance | path

Searched refs:vx6 (Results 1 – 25 of 152) sorted by relevance

1234567

/external/XNNPACK/src/f32-vsqrt/gen/
Davx512f-nr1fma1adj-x112.c36 const __m512 vx6 = _mm512_loadu_ps(x + 96); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() local
45 const __m512 vrsqrtx6 = _mm512_rsqrt14_ps(vx6); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
59 __m512 vsqrtx6 = _mm512_mul_ps(vrsqrtx6, vx6); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
91 const __m512 vadjustment6 = _mm512_fnmadd_ps(vsqrtx6, vsqrtx6, vx6); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
Dfma3-nr1fma1adj-x56.c37 const __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() local
46 const __m256 vrsqrtx6 = _mm256_rsqrt_ps(vx6); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
60 __m256 vsqrtx6 = _mm256_mul_ps(vrsqrtx6, vx6); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
92 const __m256 vadjustment6 = _mm256_fnmadd_ps(vsqrtx6, vsqrtx6, vx6); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
Davx512f-nr1fma1adj-x128.c36 const __m512 vx6 = _mm512_loadu_ps(x + 96); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() local
46 const __m512 vrsqrtx6 = _mm512_rsqrt14_ps(vx6); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
61 __m512 vsqrtx6 = _mm512_mul_ps(vrsqrtx6, vx6); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
98 const __m512 vadjustment6 = _mm512_fnmadd_ps(vsqrtx6, vsqrtx6, vx6); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
Dfma3-nr1fma1adj-x64.c37 const __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() local
47 const __m256 vrsqrtx6 = _mm256_rsqrt_ps(vx6); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
62 __m256 vsqrtx6 = _mm256_mul_ps(vrsqrtx6, vx6); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
99 const __m256 vadjustment6 = _mm256_fnmadd_ps(vsqrtx6, vsqrtx6, vx6); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx2-rr1-lut4-p4-perm-x56.c50 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() local
59 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
165 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
173 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
Dvelu-avx2-rr1-lut8-p4-perm-x56.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() local
58 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
164 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
172 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
Dvelu-avx2-rr1-lut16-p3-gather-x56.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() local
58 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
164 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
172 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
Dvelu-avx2-rr1-p6-x56.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() local
58 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56()
166 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56()
174 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56()
Dvelu-avx2-rr1-p6-x64.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() local
59 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64()
180 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64()
190 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64()
Dvelu-avx2-rr1-lut4-p4-perm-x64.c50 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64() local
60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64()
179 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64()
189 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64()
Dvelu-avx2-rr1-lut8-p4-perm-x64.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() local
59 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
178 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
188 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
Dvelu-avx2-rr1-lut16-p3-gather-x64.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() local
59 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
178 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
188 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
Dvelu-avx2-rr1-p6-x72.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() local
60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
194 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
206 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
Dvelu-avx2-rr1-lut4-p4-perm-x72.c50 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() local
61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
193 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
205 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
Dvelu-avx2-rr1-lut16-p3-gather-x72.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() local
60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
192 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
204 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
Dvelu-avx2-rr1-lut8-p4-perm-x72.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() local
60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
192 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
204 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
Dvelu-avx2-rr1-lut8-p4-perm-x80.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() local
61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
206 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
220 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
Dvelu-avx2-rr1-p6-x80.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() local
61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
208 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
222 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
Dvelu-avx2-rr1-lut4-p4-perm-x80.c50 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80() local
62 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80()
207 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80()
221 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80()
Dvelu-avx2-rr1-lut16-p3-gather-x80.c49 __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() local
61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
206 vx6 = _mm256_mul_ps(vx6, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
220 const __m256 vy6 = _mm256_blendv_ps(vx6, ve6, vx6); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
Dvelu-avx512f-rr1-lut16-p3-perm-x112.c49 __m512 vx6 = _mm512_loadu_ps(x + 96); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local
58 const __m512 vz6 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
159 const __mmask16 vsign6 = _mm512_cmp_ps_mask(vx6, vzero, _CMP_NLT_US); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
167 vy6 = _mm512_mask_mul_ps(vy6, vsign6, vx6, vbeta); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/
Davx2-p5-x56.c63 const __m256 vx6 = _mm256_sub_ps(vi6, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local
72 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
101 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
172 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
Davx2-p5-x64.c64 const __m256 vx6 = _mm256_sub_ps(vi6, vi_max); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local
74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
106 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
185 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx2-p5-x64-acc2.c63 const __m256 vx6 = _mm256_sub_ps(vi6, vi_max); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() local
73 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
105 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
184 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Davx2-p5-x64.c63 const __m256 vx6 = _mm256_sub_ps(vi6, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() local
73 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
105 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
184 vf6 = _mm256_andnot_ps(_mm256_cmp_ps(vx6, vdenorm_cutoff, _CMP_LT_OS), vf6); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()

1234567