Home
last modified time | relevance | path

Searched refs:_mm_cvtsi128_si64 (Results 1 – 25 of 62) sorted by relevance

123

/external/rust/crates/aho-corasick/src/packed/
Dvector.rs77 _mm_cvtsi128_si64(a) as u64,
78 _mm_cvtsi128_si64(_mm_srli_si128(a, 8)) as u64,
92 _mm_cvtsi128_si64(lo) as u64,
93 _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64,
94 _mm_cvtsi128_si64(hi) as u64,
95 _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64,
110 _mm_cvtsi128_si64(lo) as u64,
111 _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64,
112 _mm_cvtsi128_si64(hi) as u64,
113 _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64,
/external/XNNPACK/src/f32-sigmoid/gen/
Dsse2-lut64-p2-div-x20.c71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
72 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
80 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
81 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
89 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
90 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
98 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
99 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
107 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
108 const uint64_t vidxIJ = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxGHIJ, vidxGHIJ)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20()
[all …]
Dsse2-lut64-p2-div-x24.c76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
77 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
85 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
86 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
94 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
95 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
103 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
104 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
112 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
113 const uint64_t vidxIJ = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxGHIJ, vidxGHIJ)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
[all …]
Dsse2-lut64-p2-div-x12.c61 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
62 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
70 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
71 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
79 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
80 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
184 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
185 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
231 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
232 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
Dsse2-lut64-p2-div-x16.c66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
67 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
75 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
76 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
84 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
85 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
93 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
94 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
221 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
222 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16()
[all …]
Dsse2-lut64-p2-div-x8.c56 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
57 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
65 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
66 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
147 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
148 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
194 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
195 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
Dsse2-lut64-p2-div-x4.c49 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
50 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
96 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
97 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
Dsse41-lut64-p2-div-x24.c76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
83 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
90 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
97 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
104 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
111 const uint64_t vidxKL = (uint64_t) _mm_cvtsi128_si64(vidxKLMN); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
264 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
306 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
Dsse41-lut64-p2-div-x16.c66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
73 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
80 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
87 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
200 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
242 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
Dsse41-lut64-p2-div-x20.c71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
78 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
85 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
92 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
99 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
232 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
274 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-sse2-rr2-lut16-p3-x12.c67 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
68 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
76 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
77 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
85 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
86 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
195 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
196 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
243 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
244 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
Dvelu-sse2-rr2-lut16-p3-x16.c72 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
73 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
81 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
82 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
90 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
91 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
99 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
100 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
234 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
235 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
[all …]
Dvelu-sse2-rr2-lut16-p3-x20.c77 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
78 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
86 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
87 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
95 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
96 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
104 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
105 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
113 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
114 const uint64_t vidxIJ = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxGHIJ, vidxGHIJ)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
[all …]
Dvelu-sse2-rr2-lut16-p3-x24.c82 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
83 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
91 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
92 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
100 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
101 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
109 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
110 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
118 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
119 const uint64_t vidxIJ = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxGHIJ, vidxGHIJ)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
[all …]
Dvelu-sse2-rr2-lut16-p3-x8.c62 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
63 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
71 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
72 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
156 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
157 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
204 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
205 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
Dvelu-avx-rr2-lut16-p3-x40.c70 const uint64_t vidx0_ll = (uint64_t) _mm_cvtsi128_si64(vidx0_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
72 const uint64_t vidx0_hl = (uint64_t) _mm_cvtsi128_si64(vidx0_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
99 const uint64_t vidx1_ll = (uint64_t) _mm_cvtsi128_si64(vidx1_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
101 const uint64_t vidx1_hl = (uint64_t) _mm_cvtsi128_si64(vidx1_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
128 const uint64_t vidx2_ll = (uint64_t) _mm_cvtsi128_si64(vidx2_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
130 const uint64_t vidx2_hl = (uint64_t) _mm_cvtsi128_si64(vidx2_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
157 const uint64_t vidx3_ll = (uint64_t) _mm_cvtsi128_si64(vidx3_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
159 const uint64_t vidx3_hl = (uint64_t) _mm_cvtsi128_si64(vidx3_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
186 const uint64_t vidx4_ll = (uint64_t) _mm_cvtsi128_si64(vidx4_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
188 const uint64_t vidx4_hl = (uint64_t) _mm_cvtsi128_si64(vidx4_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
[all …]
Dvelu-avx-rr2-lut16-p3-x32.c67 const uint64_t vidx0_ll = (uint64_t) _mm_cvtsi128_si64(vidx0_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
69 const uint64_t vidx0_hl = (uint64_t) _mm_cvtsi128_si64(vidx0_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
96 const uint64_t vidx1_ll = (uint64_t) _mm_cvtsi128_si64(vidx1_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
98 const uint64_t vidx1_hl = (uint64_t) _mm_cvtsi128_si64(vidx1_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
125 const uint64_t vidx2_ll = (uint64_t) _mm_cvtsi128_si64(vidx2_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
127 const uint64_t vidx2_hl = (uint64_t) _mm_cvtsi128_si64(vidx2_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
154 const uint64_t vidx3_ll = (uint64_t) _mm_cvtsi128_si64(vidx3_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
156 const uint64_t vidx3_hl = (uint64_t) _mm_cvtsi128_si64(vidx3_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
271 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
273 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
[all …]
Dvelu-avx-rr2-lut16-p3-x48.c73 const uint64_t vidx0_ll = (uint64_t) _mm_cvtsi128_si64(vidx0_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
75 const uint64_t vidx0_hl = (uint64_t) _mm_cvtsi128_si64(vidx0_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
102 const uint64_t vidx1_ll = (uint64_t) _mm_cvtsi128_si64(vidx1_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
104 const uint64_t vidx1_hl = (uint64_t) _mm_cvtsi128_si64(vidx1_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
131 const uint64_t vidx2_ll = (uint64_t) _mm_cvtsi128_si64(vidx2_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
133 const uint64_t vidx2_hl = (uint64_t) _mm_cvtsi128_si64(vidx2_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
160 const uint64_t vidx3_ll = (uint64_t) _mm_cvtsi128_si64(vidx3_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
162 const uint64_t vidx3_hl = (uint64_t) _mm_cvtsi128_si64(vidx3_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
189 const uint64_t vidx4_ll = (uint64_t) _mm_cvtsi128_si64(vidx4_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
191 const uint64_t vidx4_hl = (uint64_t) _mm_cvtsi128_si64(vidx4_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
[all …]
Dvelu-avx-rr2-lut16-p3-x24.c64 const uint64_t vidx0_ll = (uint64_t) _mm_cvtsi128_si64(vidx0_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
66 const uint64_t vidx0_hl = (uint64_t) _mm_cvtsi128_si64(vidx0_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
93 const uint64_t vidx1_ll = (uint64_t) _mm_cvtsi128_si64(vidx1_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
95 const uint64_t vidx1_hl = (uint64_t) _mm_cvtsi128_si64(vidx1_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
122 const uint64_t vidx2_ll = (uint64_t) _mm_cvtsi128_si64(vidx2_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
124 const uint64_t vidx2_hl = (uint64_t) _mm_cvtsi128_si64(vidx2_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
222 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
224 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
288 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
290 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
Dvelu-sse2-rr2-lut16-p3-x4.c56 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
57 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
104 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
105 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
Dvelu-avx-rr2-lut16-p3-x16.c61 const uint64_t vidx0_ll = (uint64_t) _mm_cvtsi128_si64(vidx0_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
63 const uint64_t vidx0_hl = (uint64_t) _mm_cvtsi128_si64(vidx0_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
90 const uint64_t vidx1_ll = (uint64_t) _mm_cvtsi128_si64(vidx1_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
92 const uint64_t vidx1_hl = (uint64_t) _mm_cvtsi128_si64(vidx1_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
173 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
175 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
239 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
241 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
Dvelu-avx-rr2-lut16-p3-x8.c58 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
60 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
124 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
126 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
Dvelu-sse41-rr2-lut16-p3-x20.c77 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
84 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
91 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
98 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
105 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
248 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
291 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
Dvelu-sse41-rr2-lut16-p3-x24.c82 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
89 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
96 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
103 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
110 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
117 const uint64_t vidxKL = (uint64_t) _mm_cvtsi128_si64(vidxKLMN); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
282 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
325 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
/external/XNNPACK/src/math/
Dexpm1minus-sse2-rr2-lut16-p3.c73 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3()
74 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3()

123