Home
last modified time | relevance | path

Searched refs:vx5 (Results 1 – 25 of 241) sorted by relevance

12345678910

/external/XNNPACK/src/f32-vsqrt/gen/
Davx512f-nr1fma1adj-x96.c35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() local
43 const __m512 vrsqrtx5 = _mm512_rsqrt14_ps(vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
55 __m512 vsqrtx5 = _mm512_mul_ps(vrsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
83 const __m512 vadjustment5 = _mm512_fnmadd_ps(vsqrtx5, vsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
Dfma3-nr1fma1adj-x48.c34 const __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() local
42 const __m256 vrsqrtx5 = _mm256_rsqrt_ps(vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
54 __m256 vsqrtx5 = _mm256_mul_ps(vrsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
82 const __m256 vadjustment5 = _mm256_fnmadd_ps(vsqrtx5, vsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
Davx512f-nr1fma1adj-x112.c35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() local
44 const __m512 vrsqrtx5 = _mm512_rsqrt14_ps(vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
57 __m512 vsqrtx5 = _mm512_mul_ps(vrsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
90 const __m512 vadjustment5 = _mm512_fnmadd_ps(vsqrtx5, vsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
Dfma3-nr1fma1adj-x56.c34 const __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() local
43 const __m256 vrsqrtx5 = _mm256_rsqrt_ps(vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
56 __m256 vsqrtx5 = _mm256_mul_ps(vrsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
89 const __m256 vadjustment5 = _mm256_fnmadd_ps(vsqrtx5, vsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
Davx512f-nr1fma1adj-x128.c35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() local
45 const __m512 vrsqrtx5 = _mm512_rsqrt14_ps(vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
59 __m512 vsqrtx5 = _mm512_mul_ps(vrsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
97 const __m512 vadjustment5 = _mm512_fnmadd_ps(vsqrtx5, vsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
Dfma3-nr1fma1adj-x64.c34 const __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() local
44 const __m256 vrsqrtx5 = _mm256_rsqrt_ps(vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
58 __m256 vsqrtx5 = _mm256_mul_ps(vrsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
96 const __m256 vadjustment5 = _mm256_fnmadd_ps(vsqrtx5, vsqrtx5, vx5); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx2-rr1-lut8-p4-perm-x48.c44 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() local
52 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
144 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
151 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
Dvelu-avx2-rr1-lut16-p3-gather-x48.c45 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() local
53 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
145 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
152 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
Dvelu-avx2-rr1-lut4-p4-perm-x48.c44 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48() local
52 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48()
144 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48()
151 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48()
Dvelu-avx2-rr1-p6-x48.c45 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48() local
53 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
147 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
154 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x48()
Dvelu-scalar-rr2-lut16-p3-x6.c47 float vx5 = x[5]; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local
55 const float vz5 = vx5 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
172 float vy5 = vx5 * vbeta; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
189 if XNN_UNPREDICTABLE(vx5 < 0.0f) { in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
Dvelu-avx2-rr1-lut16-p3-gather-x56.c45 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() local
54 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
159 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
168 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
Dvelu-avx2-rr1-p6-x56.c45 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() local
54 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56()
161 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56()
170 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56()
Dvelu-avx2-rr1-lut8-p4-perm-x56.c44 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() local
53 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
158 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
167 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()
Dvelu-avx2-rr1-lut4-p4-perm-x56.c44 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() local
53 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
158 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
167 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
Dvelu-scalar-rr2-p6-x6.c47 float vx5 = x[5]; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local
55 const float vz5 = vx5 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
182 float vy5 = vx5 * vbeta; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
199 if XNN_UNPREDICTABLE(vx5 < 0.0f) { in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
Dvelu-avx2-rr1-lut8-p4-perm-x64.c44 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64() local
54 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
172 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
183 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64()
Dvelu-avx2-rr1-lut16-p3-gather-x64.c45 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() local
55 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
173 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
184 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
Dvelu-avx2-rr1-p6-x64.c45 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() local
55 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64()
175 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64()
186 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64()
Dvelu-avx2-rr1-p6-x72.c45 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() local
56 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
189 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
202 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
Dvelu-avx-rr2-lut4-p4-perm-x48.c47 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() local
55 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
174 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
181 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
Dvelu-avx2-rr1-lut16-p3-gather-x72.c45 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() local
56 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
187 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
200 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
Dvelu-avx2-rr1-lut4-p4-perm-x64.c44 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64() local
54 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64()
172 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64()
183 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64()
Dvelu-avx2-rr1-lut4-p4-perm-x72.c44 __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() local
55 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
186 vx5 = _mm256_mul_ps(vx5, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
199 const __m256 vy5 = _mm256_blendv_ps(vx5, ve5, vx5); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
/external/XNNPACK/src/x8-lut/gen/
Dlut-scalar-x8.c32 const size_t vx5 = (size_t) x[5]; in xnn_x8_lut_ukernel__scalar_x8() local
42 const uint32_t vt5 = (uint32_t) t[vx5]; in xnn_x8_lut_ukernel__scalar_x8()

12345678910