/external/XNNPACK/src/math/ |
D | exp-avx-rr2-p5.c | 63 __m128i ven_hi = _mm_max_epi16(veo_hi, vmin_exponent); in xnn_math_f32_exp__avx_rr2_p5() local 65 ven_hi = _mm_min_epi16(ven_hi, vmax_exponent); in xnn_math_f32_exp__avx_rr2_p5() 67 veo_hi = _mm_sub_epi32(veo_hi, ven_hi); in xnn_math_f32_exp__avx_rr2_p5() 69 const __m128 vsn_hi = _mm_castsi128_ps(_mm_add_epi32(ven_hi, vdefault_exponent)); in xnn_math_f32_exp__avx_rr2_p5()
|
D | expm1minus-avx-rr2-lut4-p4-perm.c | 72 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm() local 78 …const __m256 vs = _mm256_mul_ps(vl, _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1… in xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm()
|
D | expm1minus-avx-rr2-lut16-p3.c | 69 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local 102 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx-rr2-lut4-p4-perm-x8.c | 57 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local 60 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() 93 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local 96 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8()
|
D | velu-avx-rr2-lut4-p4-perm-x16.c | 119 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() local 122 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() 155 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() local 158 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16()
|
D | velu-avx-rr2-lut16-p3-x8.c | 81 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 88 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 147 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 154 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
|
D | velu-avx-rr2-lut4-p4-perm-x24.c | 141 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24() local 144 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24() 177 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24() local 180 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24()
|
D | velu-avx-rr2-lut4-p4-perm-x32.c | 163 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() local 166 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() 199 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() local 202 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
|
D | velu-avx-rr2-lut4-p4-perm-x40.c | 185 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() local 188 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() 221 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() local 224 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
|
D | velu-avx-rr2-lut4-p4-perm-x48.c | 207 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() local 210 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() 243 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() local 246 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
|
D | velu-avx-rr2-lut16-p3-x16.c | 196 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 203 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 262 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local 269 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
|
D | velu-avx-rr2-lut16-p3-x24.c | 245 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 252 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 311 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local 318 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
|
D | velu-avx-rr2-lut16-p3-x32.c | 294 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 301 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 360 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() local 367 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
|
D | velu-avx-rr2-lut16-p3-x40.c | 343 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 350 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 409 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local 416 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
|
D | velu-avx-rr2-lut16-p3-x48.c | 392 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 399 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 458 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() local 465 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
|
/external/XNNPACK/src/f32-velu/ |
D | avx-rr2-lut4-p4-perm.c.in | 114 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … variable 117 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1); 150 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … variable 153 ven = _mm256_insertf128_ps(_mm256_castps128_ps256(ven_lo), ven_hi, 1);
|
D | avx-rr2-lut16-p3.c.in | 163 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); variable 170 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); 229 const __m128i ven_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)), 19); variable 236 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi));
|