/external/rust/crates/aho-corasick/src/packed/ |
D | vector.rs | 77 _mm_cvtsi128_si64(a) as u64, 78 _mm_cvtsi128_si64(_mm_srli_si128(a, 8)) as u64, 92 _mm_cvtsi128_si64(lo) as u64, 93 _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64, 94 _mm_cvtsi128_si64(hi) as u64, 95 _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64, 110 _mm_cvtsi128_si64(lo) as u64, 111 _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64, 112 _mm_cvtsi128_si64(hi) as u64, 113 _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64,
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse2-lut64-p2-div-x20.c | 71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 72 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 80 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 81 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 89 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 90 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 98 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 99 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 107 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() 108 const uint64_t vidxIJ = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxGHIJ, vidxGHIJ)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20() [all …]
|
D | sse2-lut64-p2-div-x24.c | 76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 77 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 85 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 86 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 94 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 95 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 103 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 104 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 112 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 113 const uint64_t vidxIJ = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxGHIJ, vidxGHIJ)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() [all …]
|
D | sse2-lut64-p2-div-x12.c | 61 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 62 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 70 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 71 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 79 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 80 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 184 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 185 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 231 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12() 232 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12()
|
D | sse2-lut64-p2-div-x16.c | 66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 67 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 75 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 76 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 84 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 85 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 93 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 94 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 221 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() 222 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16() [all …]
|
D | sse2-lut64-p2-div-x8.c | 56 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 57 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 65 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 66 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 147 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 148 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 194 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8() 195 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8()
|
D | sse2-lut64-p2-div-x4.c | 49 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 50 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 96 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() 97 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
|
D | sse41-lut64-p2-div-x24.c | 76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 83 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 90 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 97 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 104 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 111 const uint64_t vidxKL = (uint64_t) _mm_cvtsi128_si64(vidxKLMN); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 264 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 306 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
|
D | sse41-lut64-p2-div-x16.c | 66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 73 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 80 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 87 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 200 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 242 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
|
D | sse41-lut64-p2-div-x20.c | 71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 78 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 85 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 92 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 99 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 232 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 274 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse2-rr2-lut16-p3-x12.c | 67 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 68 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 76 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 77 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 85 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 86 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 195 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 196 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 243 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 244 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
|
D | velu-sse2-rr2-lut16-p3-x16.c | 72 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 73 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 81 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 82 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 90 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 91 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 99 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 100 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 234 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 235 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() [all …]
|
D | velu-sse2-rr2-lut16-p3-x20.c | 77 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 78 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 86 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 87 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 95 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 96 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 104 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 105 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 113 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 114 const uint64_t vidxIJ = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxGHIJ, vidxGHIJ)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() [all …]
|
D | velu-sse2-rr2-lut16-p3-x24.c | 82 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 83 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 91 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 92 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 100 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 101 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 109 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 110 const uint64_t vidxEF = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxCDEF, vidxCDEF)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 118 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 119 const uint64_t vidxIJ = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidxGHIJ, vidxGHIJ)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() [all …]
|
D | velu-sse2-rr2-lut16-p3-x8.c | 62 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 63 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 71 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 72 const uint64_t vidx67 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx4567, vidx4567)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 156 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 157 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 204 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 205 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
|
D | velu-avx-rr2-lut16-p3-x40.c | 70 const uint64_t vidx0_ll = (uint64_t) _mm_cvtsi128_si64(vidx0_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 72 const uint64_t vidx0_hl = (uint64_t) _mm_cvtsi128_si64(vidx0_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 99 const uint64_t vidx1_ll = (uint64_t) _mm_cvtsi128_si64(vidx1_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 101 const uint64_t vidx1_hl = (uint64_t) _mm_cvtsi128_si64(vidx1_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 128 const uint64_t vidx2_ll = (uint64_t) _mm_cvtsi128_si64(vidx2_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 130 const uint64_t vidx2_hl = (uint64_t) _mm_cvtsi128_si64(vidx2_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 157 const uint64_t vidx3_ll = (uint64_t) _mm_cvtsi128_si64(vidx3_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 159 const uint64_t vidx3_hl = (uint64_t) _mm_cvtsi128_si64(vidx3_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 186 const uint64_t vidx4_ll = (uint64_t) _mm_cvtsi128_si64(vidx4_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() 188 const uint64_t vidx4_hl = (uint64_t) _mm_cvtsi128_si64(vidx4_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() [all …]
|
D | velu-avx-rr2-lut16-p3-x32.c | 67 const uint64_t vidx0_ll = (uint64_t) _mm_cvtsi128_si64(vidx0_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 69 const uint64_t vidx0_hl = (uint64_t) _mm_cvtsi128_si64(vidx0_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 96 const uint64_t vidx1_ll = (uint64_t) _mm_cvtsi128_si64(vidx1_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 98 const uint64_t vidx1_hl = (uint64_t) _mm_cvtsi128_si64(vidx1_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 125 const uint64_t vidx2_ll = (uint64_t) _mm_cvtsi128_si64(vidx2_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 127 const uint64_t vidx2_hl = (uint64_t) _mm_cvtsi128_si64(vidx2_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 154 const uint64_t vidx3_ll = (uint64_t) _mm_cvtsi128_si64(vidx3_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 156 const uint64_t vidx3_hl = (uint64_t) _mm_cvtsi128_si64(vidx3_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 271 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() 273 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32() [all …]
|
D | velu-avx-rr2-lut16-p3-x48.c | 73 const uint64_t vidx0_ll = (uint64_t) _mm_cvtsi128_si64(vidx0_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 75 const uint64_t vidx0_hl = (uint64_t) _mm_cvtsi128_si64(vidx0_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 102 const uint64_t vidx1_ll = (uint64_t) _mm_cvtsi128_si64(vidx1_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 104 const uint64_t vidx1_hl = (uint64_t) _mm_cvtsi128_si64(vidx1_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 131 const uint64_t vidx2_ll = (uint64_t) _mm_cvtsi128_si64(vidx2_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 133 const uint64_t vidx2_hl = (uint64_t) _mm_cvtsi128_si64(vidx2_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 160 const uint64_t vidx3_ll = (uint64_t) _mm_cvtsi128_si64(vidx3_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 162 const uint64_t vidx3_hl = (uint64_t) _mm_cvtsi128_si64(vidx3_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 189 const uint64_t vidx4_ll = (uint64_t) _mm_cvtsi128_si64(vidx4_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() 191 const uint64_t vidx4_hl = (uint64_t) _mm_cvtsi128_si64(vidx4_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48() [all …]
|
D | velu-avx-rr2-lut16-p3-x24.c | 64 const uint64_t vidx0_ll = (uint64_t) _mm_cvtsi128_si64(vidx0_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 66 const uint64_t vidx0_hl = (uint64_t) _mm_cvtsi128_si64(vidx0_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 93 const uint64_t vidx1_ll = (uint64_t) _mm_cvtsi128_si64(vidx1_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 95 const uint64_t vidx1_hl = (uint64_t) _mm_cvtsi128_si64(vidx1_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 122 const uint64_t vidx2_ll = (uint64_t) _mm_cvtsi128_si64(vidx2_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 124 const uint64_t vidx2_hl = (uint64_t) _mm_cvtsi128_si64(vidx2_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 222 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 224 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 288 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() 290 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
|
D | velu-sse2-rr2-lut16-p3-x4.c | 56 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 57 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 104 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() 105 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
|
D | velu-avx-rr2-lut16-p3-x16.c | 61 const uint64_t vidx0_ll = (uint64_t) _mm_cvtsi128_si64(vidx0_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 63 const uint64_t vidx0_hl = (uint64_t) _mm_cvtsi128_si64(vidx0_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 90 const uint64_t vidx1_ll = (uint64_t) _mm_cvtsi128_si64(vidx1_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 92 const uint64_t vidx1_hl = (uint64_t) _mm_cvtsi128_si64(vidx1_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 173 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 175 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 239 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() 241 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
|
D | velu-avx-rr2-lut16-p3-x8.c | 58 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 60 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 124 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 126 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
|
D | velu-sse41-rr2-lut16-p3-x20.c | 77 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 84 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 91 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 98 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 105 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 248 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 291 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
|
D | velu-sse41-rr2-lut16-p3-x24.c | 82 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 89 const uint64_t vidx45 = (uint64_t) _mm_cvtsi128_si64(vidx4567); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 96 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 103 const uint64_t vidxCD = (uint64_t) _mm_cvtsi128_si64(vidxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 110 const uint64_t vidxGH = (uint64_t) _mm_cvtsi128_si64(vidxGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 117 const uint64_t vidxKL = (uint64_t) _mm_cvtsi128_si64(vidxKLMN); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 282 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 325 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
|
/external/XNNPACK/src/math/ |
D | expm1minus-sse2-rr2-lut16-p3.c | 73 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3() 74 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_math_f32_expm1minus__sse2_rr2_lut16_p3()
|