Home
last modified time | relevance | path

Searched refs:ven_hi (Results 1 – 17 of 17) sorted by relevance

/external/XNNPACK/src/math/
Dexp-avx-rr2-p5.c63 __m128i ven_hi = _mm_max_epi16(veo_hi, vmin_exponent); in xnn_math_f32_exp__avx_rr2_p5() local
65 ven_hi = _mm_min_epi16(ven_hi, vmax_exponent); in xnn_math_f32_exp__avx_rr2_p5()
67 veo_hi = _mm_sub_epi32(veo_hi, ven_hi); in xnn_math_f32_exp__avx_rr2_p5()
69 const __m128 vsn_hi = _mm_castsi128_ps(_mm_add_epi32(ven_hi, vdefault_exponent)); in xnn_math_f32_exp__avx_rr2_p5()
Dexpm1minus-avx-rr2-lut4-p4-perm.c72 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm() local
78 …const __m256 vs = _mm256_mul_ps(vl, _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1… in xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm()
Dexpm1minus-avx-rr2-lut16-p3.c69 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local
102 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx-rr2-lut4-p4-perm-x8.c57 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local
60 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
93 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local
96 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
Dvelu-avx-rr2-lut4-p4-perm-x16.c119 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() local
122 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16()
155 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() local
158 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16()
Dvelu-avx-rr2-lut16-p3-x8.c81 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local
88 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
147 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local
154 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
Dvelu-avx-rr2-lut4-p4-perm-x24.c141 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24() local
144 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24()
177 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24() local
180 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24()
Dvelu-avx-rr2-lut4-p4-perm-x32.c163 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() local
166 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
199 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() local
202 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
Dvelu-avx-rr2-lut4-p4-perm-x40.c185 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() local
188 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
221 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() local
224 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
Dvelu-avx-rr2-lut4-p4-perm-x48.c207 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() local
210 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
243 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() local
246 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
Dvelu-avx-rr2-lut16-p3-x16.c196 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local
203 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
262 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local
269 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
Dvelu-avx-rr2-lut16-p3-x24.c245 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local
252 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
311 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local
318 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
Dvelu-avx-rr2-lut16-p3-x32.c294 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local
301 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
360 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local
367 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
Dvelu-avx-rr2-lut16-p3-x40.c343 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local
350 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
409 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local
416 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
Dvelu-avx-rr2-lut16-p3-x48.c392 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local
399 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
458 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local
465 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
/external/XNNPACK/src/f32-velu/
Davx-rr2-lut4-p4-perm.c.in114 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … variable
117 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1);
150 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … variable
153 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1);
Davx-rr2-lut16-p3.c.in163 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); variable
170 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi));
229 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); variable
236 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi));