Home
last modified time | relevance | path

Searched refs:vidx0123 (Results 1 – 25 of 119) sorted by relevance

12345

/external/XNNPACK/src/f32-vsigmoid/gen/
Dvsigmoid-sse2-rr2-lut64-p2-div-x8.c52 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8() local
56 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8()
57 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8()
75 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8()
76 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8()
77 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8()
78 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8()
Dvsigmoid-sse41-rr2-lut64-p2-div-x8.c52 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() local
56 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8()
57 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8()
71 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8()
72 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8()
73 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8()
74 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8()
Dvsigmoid-sse2-rr2-lut64-p2-div-x12.c56 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() local
61 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12()
62 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12()
89 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12()
90 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12()
91 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12()
92 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12()
Dvsigmoid-sse41-rr2-lut64-p2-div-x12.c56 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() local
61 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12()
62 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12()
83 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12()
84 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12()
85 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12()
86 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12()
Dvsigmoid-sse2-rr2-lut64-p2-div-x16.c60 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() local
66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16()
67 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16()
103 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16()
104 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16()
105 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16()
106 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16()
Dvsigmoid-sse41-rr2-lut64-p2-div-x16.c60 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() local
66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16()
67 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16()
95 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16()
96 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16()
97 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16()
98 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16()
Dvsigmoid-sse41-rr2-lut64-p2-div-x20.c64 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() local
71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20()
72 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20()
107 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20()
108 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20()
109 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20()
110 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20()
Dvsigmoid-sse2-rr2-lut64-p2-div-x20.c64 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() local
71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20()
72 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20()
117 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20()
118 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20()
119 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20()
120 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20()
Dvsigmoid-sse2-rr2-lut64-p2-div-x24.c68 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() local
76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24()
77 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24()
131 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24()
132 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24()
133 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24()
134 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24()
Dvsigmoid-sse41-rr2-lut64-p2-div-x24.c68 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() local
76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24()
77 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24()
119 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24()
120 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24()
121 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24()
122 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24()
Dvsigmoid-neonfma-rr1-lut64-p2-div-x8.c50 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local
53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
54 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
Dvsigmoid-neonfma-rr1-lut2048-p1-div-x8.c49 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local
52 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
53 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
Dvsigmoid-wasmsimd-rr2-lut64-p2-div-x8.c51 const v128_t vidx0123 = wasm_i32x4_shl(wasm_v128_and(vn0123, vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() local
54 const uint64_t vidx01 = wasm_i64x2_extract_lane(vidx0123, 0); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8()
55 const uint64_t vidx23 = wasm_i64x2_extract_lane(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-sse2-rr2-lut16-p3-x8.c55 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local
61 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
62 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
80 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
81 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
82 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
83 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
Dvelu-sse41-rr2-lut16-p3-x8.c55 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local
61 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
62 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
76 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
77 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
78 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
79 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
Dvelu-sse2-rr2-lut16-p3-x12.c58 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local
66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
67 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
94 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
95 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
96 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
97 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
Dvelu-sse41-rr2-lut16-p3-x12.c58 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local
66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
67 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
88 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
89 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
90 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
91 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
Dvelu-sse2-rr2-lut16-p3-x16.c61 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() local
71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
72 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
108 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
109 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
110 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
111 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
Dvelu-sse41-rr2-lut16-p3-x16.c61 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local
71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
72 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
100 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
101 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
102 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
103 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
Dvelu-sse2-rr2-lut16-p3-x20.c64 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() local
76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
77 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
122 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
123 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
124 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
125 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
Dvelu-sse41-rr2-lut16-p3-x20.c64 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() local
76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
77 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
112 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
113 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
114 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
115 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
Dvelu-sse2-rr2-lut16-p3-x24.c67 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() local
81 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
82 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
136 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
137 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
138 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
139 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
Dvelu-sse41-rr2-lut16-p3-x24.c67 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() local
81 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
82 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
124 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
125 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
126 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
127 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneonfma-rr1-lut64-p2-x8-acc2.c53 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8_acc2() local
54 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8_acc2()
55 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8_acc2()
Dneonfma-rr1-lut64-p2-x8.c52 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8() local
53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8()
54 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8()

12345