Home
last modified time | relevance | path

Searched refs:_mm_slli_epi32 (Results 1 – 25 of 198) sorted by relevance

12345678

/external/oboe/samples/RhythmGame/third_party/glm/simd/
Dinteger.h38 Reg2 = _mm_slli_epi32(Reg1, 4); in glm_i128_interleave()
44 Reg2 = _mm_slli_epi32(Reg1, 2); in glm_i128_interleave()
50 Reg2 = _mm_slli_epi32(Reg1, 1); in glm_i128_interleave()
55 Reg2 = _mm_slli_epi32(Reg1, 1); in glm_i128_interleave()
91 Reg2 = _mm_slli_epi32(Reg1, 4); in glm_i128_interleave2()
97 Reg2 = _mm_slli_epi32(Reg1, 2); in glm_i128_interleave2()
103 Reg2 = _mm_slli_epi32(Reg1, 1); in glm_i128_interleave2()
108 Reg2 = _mm_slli_epi32(Reg1, 1); in glm_i128_interleave2()
/external/XNNPACK/src/f32-sigmoid/gen/
Davx-rr2-p5-div-x80.c76 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
77 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
79 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
80 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
82 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
83 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
85 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
86 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
88 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
89 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
[all …]
Davx-rr2-p5-div-x72.c73 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
74 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
76 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
77 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
79 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
80 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
82 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
83 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
85 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
86 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
[all …]
Davx-rr2-p5-div-x64.c70 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
71 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
73 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
74 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
76 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
77 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
79 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
80 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
82 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
83 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
[all …]
Davx-rr2-p5-div-x56.c67 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
68 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
70 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
71 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
73 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
74 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
76 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
77 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
79 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
80 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x56()
[all …]
Davx-rr2-p5-div-x48.c64 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
65 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
67 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
68 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
70 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
71 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
73 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
74 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
76 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
77 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
[all …]
Davx-rr2-p5-div-x40.c61 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40()
62 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40()
64 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40()
65 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40()
67 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40()
68 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40()
70 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40()
71 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40()
73 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40()
74 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40()
[all …]
Davx-rr2-p5-div-x32.c58 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32()
59 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32()
61 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32()
62 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32()
64 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32()
65 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32()
67 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32()
68 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32()
150 …const __m128 vs_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)),… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32()
151 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32()
[all …]
Davx-rr2-p5-nr2-x80.c77 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
78 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
80 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
81 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
83 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
84 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
86 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
87 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
89 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
90 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
[all …]
Davx-rr2-p5-nr2-x72.c74 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
75 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
77 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
78 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
80 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
81 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
83 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
84 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
86 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
87 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x72()
[all …]
Davx-rr2-p5-nr2-x64.c71 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
72 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
74 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
75 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
77 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
78 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
80 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
81 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
83 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
84 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x64()
[all …]
Davx-rr2-p5-nr2-x56.c68 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
69 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
71 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
72 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
74 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
75 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
77 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
78 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
80 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
81 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x56()
[all …]
Davx-rr2-p5-nr2-x40.c62 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
63 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
65 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
66 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
68 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
69 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
71 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
72 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
74 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
75 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
[all …]
Davx-rr2-p5-nr2-x48.c65 …const __m128 vs_lo0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
66 …const __m128 vs_hi0 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
68 …const __m128 vs_lo1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
69 …const __m128 vs_hi1 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
71 …const __m128 vs_lo2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
72 …const __m128 vs_hi2 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
74 …const __m128 vs_lo3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
75 …const __m128 vs_hi3 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
77 …const __m128 vs_lo4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
78 …const __m128 vs_hi4 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x48()
[all …]
/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx-rr2-lut16-p3-x40.c67 const __m128i vidx0_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx0)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
68 const __m128i vidx0_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx0, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
96 const __m128i vidx1_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
97 const __m128i vidx1_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx1, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
125 const __m128i vidx2_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx2)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
126 const __m128i vidx2_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx2, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
154 const __m128i vidx3_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx3)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
155 const __m128i vidx3_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx3, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
183 const __m128i vidx4_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx4)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
184 const __m128i vidx4_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx4, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
[all …]
Dvelu-avx-rr2-lut4-p4-perm-x48.c71 …const __m128 ven0_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
74 …const __m128 ven1_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
77 …const __m128 ven2_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
80 …const __m128 ven3_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
83 …const __m128 ven4_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
86 …const __m128 ven5_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
89 …const __m128 ven0_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven0… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
91 …const __m128 ven1_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven1… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
93 …const __m128 ven2_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven2… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
95 …const __m128 ven3_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven3… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
[all …]
Dvelu-avx-rr2-p6-x48.c67 …const __m128 vs0_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
68 …const __m128 vs0_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
70 …const __m128 vs1_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
71 …const __m128 vs1_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
73 …const __m128 vs2_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
74 …const __m128 vs2_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
76 …const __m128 vs3_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
77 …const __m128 vs3_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
79 …const __m128 vs4_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
80 …const __m128 vs4_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_velu_ukernel__avx_rr2_p6_x48()
[all …]
Dvelu-avx-rr2-lut16-p3-x32.c64 const __m128i vidx0_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx0)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
65 const __m128i vidx0_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx0, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
93 const __m128i vidx1_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
94 const __m128i vidx1_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx1, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
122 const __m128i vidx2_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx2)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
123 const __m128i vidx2_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx2, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
151 const __m128i vidx3_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx3)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
152 const __m128i vidx3_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx3, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
179 const __m128i ven0_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
180 const __m128i ven0_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
[all …]
Dvelu-avx-rr2-lut16-p3-x48.c70 const __m128i vidx0_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx0)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
71 const __m128i vidx0_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx0, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
99 const __m128i vidx1_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
100 const __m128i vidx1_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx1, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
128 const __m128i vidx2_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx2)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
129 const __m128i vidx2_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx2, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
157 const __m128i vidx3_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx3)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
158 const __m128i vidx3_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx3, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
186 const __m128i vidx4_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx4)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
187 const __m128i vidx4_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx4, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
[all …]
Dvelu-avx-rr2-p6-x40.c64 …const __m128 vs0_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
65 …const __m128 vs0_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
67 …const __m128 vs1_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
68 …const __m128 vs1_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
70 …const __m128 vs2_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
71 …const __m128 vs2_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
73 …const __m128 vs3_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
74 …const __m128 vs3_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
76 …const __m128 vs4_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn4)… in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
77 …const __m128 vs4_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn4, … in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
[all …]
Dvelu-avx-rr2-lut4-p4-perm-x40.c68 …const __m128 ven0_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
71 …const __m128 ven1_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
74 …const __m128 ven2_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
77 …const __m128 ven3_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
80 …const __m128 ven4_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
83 …const __m128 ven0_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven0… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
85 …const __m128 ven1_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven1… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
87 …const __m128 ven2_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven2… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
89 …const __m128 ven3_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven3… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
91 …const __m128 ven4_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven4… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
[all …]
Dvelu-avx-rr2-lut16-p3-x24.c61 const __m128i vidx0_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx0)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
62 const __m128i vidx0_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx0, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
90 const __m128i vidx1_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
91 const __m128i vidx1_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx1, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
119 const __m128i vidx2_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx2)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
120 const __m128i vidx2_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx2, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
147 const __m128i ven0_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
148 const __m128i ven0_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
152 const __m128i ven1_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
153 const __m128i ven1_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, 1)), 19); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
[all …]
Dvelu-avx-rr2-lut4-p4-perm-x32.c65 …const __m128 ven0_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
68 …const __m128 ven1_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
71 …const __m128 ven2_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
74 …const __m128 ven3_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
77 …const __m128 ven0_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven0… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
79 …const __m128 ven1_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven1… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
81 …const __m128 ven2_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven2… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
83 …const __m128 ven3_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven3… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
161 …const __m128 ven_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven)… in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
163 …const __m128 ven_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven, … in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
[all …]
Dvelu-avx-rr2-p6-x32.c61 …const __m128 vs0_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn0)… in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
62 …const __m128 vs0_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn0, … in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
64 …const __m128 vs1_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
65 …const __m128 vs1_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn1, … in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
67 …const __m128 vs2_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn2)… in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
68 …const __m128 vs2_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn2, … in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
70 …const __m128 vs3_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn3)… in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
71 …const __m128 vs3_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn3, … in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
154 …const __m128 vs_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vn)),… in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
155 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
[all …]
/external/libhevc/common/x86/
Dihevc_itrans_recon_ssse3_intr.c191 m_temp_reg_20 = _mm_slli_epi32(m_temp_reg_1, 6); in ihevc_itrans_recon_4x4_ttype1_ssse3()
192 m_temp_reg_21 = _mm_slli_epi32(m_temp_reg_1, 3); in ihevc_itrans_recon_4x4_ttype1_ssse3()
193 m_temp_reg_22 = _mm_slli_epi32(m_temp_reg_1, 1); in ihevc_itrans_recon_4x4_ttype1_ssse3()
222 m_temp_reg_20 = _mm_slli_epi32(m_temp_reg_10, 5); in ihevc_itrans_recon_4x4_ttype1_ssse3()
223 m_temp_reg_21 = _mm_slli_epi32(m_temp_reg_10, 1); in ihevc_itrans_recon_4x4_ttype1_ssse3()
228 m_temp_reg_20 = _mm_slli_epi32(m_temp_reg_11, 6); in ihevc_itrans_recon_4x4_ttype1_ssse3()
229 m_temp_reg_21 = _mm_slli_epi32(m_temp_reg_11, 3); in ihevc_itrans_recon_4x4_ttype1_ssse3()
236 m_temp_reg_20 = _mm_slli_epi32(m_temp_reg_11, 5); in ihevc_itrans_recon_4x4_ttype1_ssse3()
237 m_temp_reg_21 = _mm_slli_epi32(m_temp_reg_11, 1); in ihevc_itrans_recon_4x4_ttype1_ssse3()
242 m_temp_reg_20 = _mm_slli_epi32(m_temp_reg_12, 6); in ihevc_itrans_recon_4x4_ttype1_ssse3()
[all …]

12345678