• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX2OR512VL
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=ALL,AVX2OR512VL
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL
7
8define <2 x i64> @unpckh_unary_extracted_v4i64(<4 x i64> %x) {
9; AVX1-LABEL: unpckh_unary_extracted_v4i64:
10; AVX1:       # %bb.0:
11; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
12; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
13; AVX1-NEXT:    vzeroupper
14; AVX1-NEXT:    retq
15;
16; AVX2OR512VL-LABEL: unpckh_unary_extracted_v4i64:
17; AVX2OR512VL:       # %bb.0:
18; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,3,2,3]
19; AVX2OR512VL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
20; AVX2OR512VL-NEXT:    vzeroupper
21; AVX2OR512VL-NEXT:    retq
22  %extrl = shufflevector <4 x i64> %x, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
23  %extrh = shufflevector <4 x i64> %x, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
24  %r = shufflevector <2 x i64> %extrl, <2 x i64> %extrh, <2 x i32> <i32 1, i32 3>
25  ret <2 x i64> %r
26}
27
28define <2 x double> @unpckh_unary_extracted_v8f64(<4 x double> %x) {
29; AVX1-LABEL: unpckh_unary_extracted_v8f64:
30; AVX1:       # %bb.0:
31; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
32; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
33; AVX1-NEXT:    vzeroupper
34; AVX1-NEXT:    retq
35;
36; AVX2OR512VL-LABEL: unpckh_unary_extracted_v8f64:
37; AVX2OR512VL:       # %bb.0:
38; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,3,2,3]
39; AVX2OR512VL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
40; AVX2OR512VL-NEXT:    vzeroupper
41; AVX2OR512VL-NEXT:    retq
42  %extrl = shufflevector <4 x double> %x, <4 x double> undef, <2 x i32> <i32 0, i32 1>
43  %extrh = shufflevector <4 x double> %x, <4 x double> undef, <2 x i32> <i32 2, i32 3>
44  %r = shufflevector <2 x double> %extrl, <2 x double> %extrh, <2 x i32> <i32 1, i32 3>
45  ret <2 x double> %r
46}
47
48; vpermps requires a constant load for the index op. It's unlikely to be profitable.
49
50define <4 x i32> @unpckh_unary_extracted_v8i32(<8 x i32> %x) {
51; ALL-LABEL: unpckh_unary_extracted_v8i32:
52; ALL:       # %bb.0:
53; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm1
54; ALL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
55; ALL-NEXT:    vzeroupper
56; ALL-NEXT:    retq
57  %extrl = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
58  %extrh = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
59  %r = shufflevector <4 x i32> %extrl, <4 x i32> %extrh, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
60  ret <4 x i32> %r
61}
62
63define <4 x float> @unpckh_unary_extracted_v8f32(<8 x float> %x) {
64; ALL-LABEL: unpckh_unary_extracted_v8f32:
65; ALL:       # %bb.0:
66; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm1
67; ALL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
68; ALL-NEXT:    vzeroupper
69; ALL-NEXT:    retq
70  %extrl = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
71  %extrh = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
72  %r = shufflevector <4 x float> %extrl, <4 x float> %extrh, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
73  ret <4 x float> %r
74}
75
76define <8 x i16> @unpckh_unary_extracted_v16i16(<16 x i16> %x) {
77; AVX1-LABEL: unpckh_unary_extracted_v16i16:
78; AVX1:       # %bb.0:
79; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
80; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
81; AVX1-NEXT:    vzeroupper
82; AVX1-NEXT:    retq
83;
84; AVX2OR512VL-LABEL: unpckh_unary_extracted_v16i16:
85; AVX2OR512VL:       # %bb.0:
86; AVX2OR512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
87; AVX2OR512VL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
88; AVX2OR512VL-NEXT:    vzeroupper
89; AVX2OR512VL-NEXT:    retq
90  %extrl = shufflevector <16 x i16> %x, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
91  %extrh = shufflevector <16 x i16> %x, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
92  %r = shufflevector <8 x i16> %extrl, <8 x i16> %extrh, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
93  ret <8 x i16> %r
94}
95
96define <16 x i8> @unpckh_unary_extracted_v32i8(<32 x i8> %x) {
97; AVX1-LABEL: unpckh_unary_extracted_v32i8:
98; AVX1:       # %bb.0:
99; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
100; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
101; AVX1-NEXT:    vzeroupper
102; AVX1-NEXT:    retq
103;
104; AVX2OR512VL-LABEL: unpckh_unary_extracted_v32i8:
105; AVX2OR512VL:       # %bb.0:
106; AVX2OR512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
107; AVX2OR512VL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
108; AVX2OR512VL-NEXT:    vzeroupper
109; AVX2OR512VL-NEXT:    retq
110  %extrl = shufflevector <32 x i8> %x, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
111  %extrh = shufflevector <32 x i8> %x, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
112  %r = shufflevector <16 x i8> %extrl, <16 x i8> %extrh, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
113  ret <16 x i8> %r
114}
115
116define <2 x i64> @unpckl_unary_extracted_v4i64(<4 x i64> %x) {
117; AVX1-LABEL: unpckl_unary_extracted_v4i64:
118; AVX1:       # %bb.0:
119; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
120; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
121; AVX1-NEXT:    vzeroupper
122; AVX1-NEXT:    retq
123;
124; AVX2OR512VL-LABEL: unpckl_unary_extracted_v4i64:
125; AVX2OR512VL:       # %bb.0:
126; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
127; AVX2OR512VL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
128; AVX2OR512VL-NEXT:    vzeroupper
129; AVX2OR512VL-NEXT:    retq
130  %extrl = shufflevector <4 x i64> %x, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
131  %extrh = shufflevector <4 x i64> %x, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
132  %r = shufflevector <2 x i64> %extrl, <2 x i64> %extrh, <2 x i32> <i32 0, i32 2>
133  ret <2 x i64> %r
134}
135
136define <2 x double> @unpckl_unary_extracted_v8f64(<4 x double> %x) {
137; AVX1-LABEL: unpckl_unary_extracted_v8f64:
138; AVX1:       # %bb.0:
139; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
140; AVX1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
141; AVX1-NEXT:    vzeroupper
142; AVX1-NEXT:    retq
143;
144; AVX2OR512VL-LABEL: unpckl_unary_extracted_v8f64:
145; AVX2OR512VL:       # %bb.0:
146; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
147; AVX2OR512VL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
148; AVX2OR512VL-NEXT:    vzeroupper
149; AVX2OR512VL-NEXT:    retq
150  %extrl = shufflevector <4 x double> %x, <4 x double> undef, <2 x i32> <i32 0, i32 1>
151  %extrh = shufflevector <4 x double> %x, <4 x double> undef, <2 x i32> <i32 2, i32 3>
152  %r = shufflevector <2 x double> %extrl, <2 x double> %extrh, <2 x i32> <i32 0, i32 2>
153  ret <2 x double> %r
154}
155
156; vpermps requires a constant load for the index op. It's unlikely to be profitable.
157
158define <4 x i32> @unpckl_unary_extracted_v8i32(<8 x i32> %x) {
159; ALL-LABEL: unpckl_unary_extracted_v8i32:
160; ALL:       # %bb.0:
161; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm1
162; ALL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
163; ALL-NEXT:    vzeroupper
164; ALL-NEXT:    retq
165  %extrl = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
166  %extrh = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
167  %r = shufflevector <4 x i32> %extrl, <4 x i32> %extrh, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
168  ret <4 x i32> %r
169}
170
171define <4 x float> @unpckl_unary_extracted_v8f32(<8 x float> %x) {
172; ALL-LABEL: unpckl_unary_extracted_v8f32:
173; ALL:       # %bb.0:
174; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm1
175; ALL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
176; ALL-NEXT:    vzeroupper
177; ALL-NEXT:    retq
178  %extrl = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
179  %extrh = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
180  %r = shufflevector <4 x float> %extrl, <4 x float> %extrh, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
181  ret <4 x float> %r
182}
183
184define <8 x i16> @unpckl_unary_extracted_v16i16(<16 x i16> %x) {
185; AVX1-LABEL: unpckl_unary_extracted_v16i16:
186; AVX1:       # %bb.0:
187; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
188; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
189; AVX1-NEXT:    vzeroupper
190; AVX1-NEXT:    retq
191;
192; AVX2OR512VL-LABEL: unpckl_unary_extracted_v16i16:
193; AVX2OR512VL:       # %bb.0:
194; AVX2OR512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
195; AVX2OR512VL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
196; AVX2OR512VL-NEXT:    vzeroupper
197; AVX2OR512VL-NEXT:    retq
198  %extrl = shufflevector <16 x i16> %x, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
199  %extrh = shufflevector <16 x i16> %x, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
200  %r = shufflevector <8 x i16> %extrl, <8 x i16> %extrh, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
201  ret <8 x i16> %r
202}
203
204define <16 x i8> @unpckl_unary_extracted_v32i8(<32 x i8> %x) {
205; AVX1-LABEL: unpckl_unary_extracted_v32i8:
206; AVX1:       # %bb.0:
207; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
208; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
209; AVX1-NEXT:    vzeroupper
210; AVX1-NEXT:    retq
211;
212; AVX2OR512VL-LABEL: unpckl_unary_extracted_v32i8:
213; AVX2OR512VL:       # %bb.0:
214; AVX2OR512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
215; AVX2OR512VL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
216; AVX2OR512VL-NEXT:    vzeroupper
217; AVX2OR512VL-NEXT:    retq
218  %extrl = shufflevector <32 x i8> %x, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
219  %extrh = shufflevector <32 x i8> %x, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
220  %r = shufflevector <16 x i8> %extrl, <16 x i8> %extrh, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
221  ret <16 x i8> %r
222}
223
224; This would infinite loop because we did not recognize the unpack shuffle mask in commuted form.
225
226define <8 x i32> @extract_unpckl_v8i32(<8 x i32> %a) {
227; ALL-LABEL: extract_unpckl_v8i32:
228; ALL:       # %bb.0:
229; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm1
230; ALL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
231; ALL-NEXT:    retq
232  %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 4, i32 undef, i32 5, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
233  ret <8 x i32> %shuffle
234}
235
236