• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=CHECK
3
4define <4 x i32> @mask_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) {
5; CHECK-LABEL: mask_shuffle_v4i32_1234:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    kmovd %edi, %k1
8; CHECK-NEXT:    valignd {{.*#+}} xmm2 {%k1} = xmm0[1,2,3],xmm1[0]
9; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
10; CHECK-NEXT:    retq
11  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
12  %mask.cast = bitcast i8 %mask to <8 x i1>
13  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
15  ret <4 x i32> %res
16}
17
18define <4 x i32> @maskz_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
19; CHECK-LABEL: maskz_shuffle_v4i32_1234:
20; CHECK:       # %bb.0:
21; CHECK-NEXT:    kmovd %edi, %k1
22; CHECK-NEXT:    valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3],xmm1[0]
23; CHECK-NEXT:    retq
24  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
25  %mask.cast = bitcast i8 %mask to <8 x i1>
26  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
27  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
28  ret <4 x i32> %res
29}
30
31define <4 x i32> @mask_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) {
32; CHECK-LABEL: mask_shuffle_v4i32_2345:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    kmovd %edi, %k1
35; CHECK-NEXT:    valignd {{.*#+}} xmm2 {%k1} = xmm0[2,3],xmm1[0,1]
36; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
37; CHECK-NEXT:    retq
38  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
39  %mask.cast = bitcast i8 %mask to <8 x i1>
40  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
41  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
42  ret <4 x i32> %res
43}
44
45define <4 x i32> @maskz_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
46; CHECK-LABEL: maskz_shuffle_v4i32_2345:
47; CHECK:       # %bb.0:
48; CHECK-NEXT:    kmovd %edi, %k1
49; CHECK-NEXT:    valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3],xmm1[0,1]
50; CHECK-NEXT:    retq
51  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
52  %mask.cast = bitcast i8 %mask to <8 x i1>
53  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
54  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
55  ret <4 x i32> %res
56}
57
58define <2 x i64> @mask_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passthru, i8 %mask) {
59; CHECK-LABEL: mask_shuffle_v2i64_12:
60; CHECK:       # %bb.0:
61; CHECK-NEXT:    kmovd %edi, %k1
62; CHECK-NEXT:    valignq {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0]
63; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
64; CHECK-NEXT:    retq
65  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
66  %mask.cast = bitcast i8 %mask to <8 x i1>
67  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
68  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
69  ret <2 x i64> %res
70}
71
72define <2 x i64> @maskz_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
73; CHECK-LABEL: maskz_shuffle_v2i64_12:
74; CHECK:       # %bb.0:
75; CHECK-NEXT:    kmovd %edi, %k1
76; CHECK-NEXT:    valignq {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0]
77; CHECK-NEXT:    retq
78  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
79  %mask.cast = bitcast i8 %mask to <8 x i1>
80  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
81  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
82  ret <2 x i64> %res
83}
84
85define <4 x i64> @mask_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passthru, i8 %mask) {
86; CHECK-LABEL: mask_shuffle_v4i64_1234:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    kmovd %edi, %k1
89; CHECK-NEXT:    valignq {{.*#+}} ymm2 {%k1} = ymm0[1,2,3],ymm1[0]
90; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
91; CHECK-NEXT:    retq
92  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
93  %mask.cast = bitcast i8 %mask to <8 x i1>
94  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
95  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
96  ret <4 x i64> %res
97}
98
99define <4 x i64> @maskz_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
100; CHECK-LABEL: maskz_shuffle_v4i64_1234:
101; CHECK:       # %bb.0:
102; CHECK-NEXT:    kmovd %edi, %k1
103; CHECK-NEXT:    valignq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3],ymm1[0]
104; CHECK-NEXT:    retq
105  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
106  %mask.cast = bitcast i8 %mask to <8 x i1>
107  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
108  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
109  ret <4 x i64> %res
110}
111
112define <4 x i64> @mask_shuffle_v4i64_1230(<4 x i64> %a, <4 x i64> %passthru, i8 %mask) {
113; CHECK-LABEL: mask_shuffle_v4i64_1230:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    kmovd %edi, %k1
116; CHECK-NEXT:    vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,0]
117; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
118; CHECK-NEXT:    retq
119  %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
120  %mask.cast = bitcast i8 %mask to <8 x i1>
121  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
122  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
123  ret <4 x i64> %res
124}
125
126define <4 x i64> @maskz_shuffle_v4i64_1230(<4 x i64> %a, i8 %mask) {
127; CHECK-LABEL: maskz_shuffle_v4i64_1230:
128; CHECK:       # %bb.0:
129; CHECK-NEXT:    kmovd %edi, %k1
130; CHECK-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,0]
131; CHECK-NEXT:    retq
132  %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
133  %mask.cast = bitcast i8 %mask to <8 x i1>
134  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
135  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
136  ret <4 x i64> %res
137}
138
139define <8 x i32> @mask_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) {
140; CHECK-LABEL: mask_shuffle_v8i32_12345678:
141; CHECK:       # %bb.0:
142; CHECK-NEXT:    kmovd %edi, %k1
143; CHECK-NEXT:    valignd {{.*#+}} ymm2 {%k1} = ymm0[1,2,3,4,5,6,7],ymm1[0]
144; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
145; CHECK-NEXT:    retq
146  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
147  %mask.cast = bitcast i8 %mask to <8 x i1>
148  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
149  ret <8 x i32> %res
150}
151
152define <8 x i32> @maskz_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
153; CHECK-LABEL: maskz_shuffle_v8i32_12345678:
154; CHECK:       # %bb.0:
155; CHECK-NEXT:    kmovd %edi, %k1
156; CHECK-NEXT:    valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7],ymm1[0]
157; CHECK-NEXT:    retq
158  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
159  %mask.cast = bitcast i8 %mask to <8 x i1>
160  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
161  ret <8 x i32> %res
162}
163
164define <8 x i32> @mask_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) {
165; CHECK-LABEL: mask_shuffle_v8i32_23456789:
166; CHECK:       # %bb.0:
167; CHECK-NEXT:    kmovd %edi, %k1
168; CHECK-NEXT:    valignd {{.*#+}} ymm2 {%k1} = ymm0[2,3,4,5,6,7],ymm1[0,1]
169; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
170; CHECK-NEXT:    retq
171  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
172  %mask.cast = bitcast i8 %mask to <8 x i1>
173  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
174  ret <8 x i32> %res
175}
176
177define <8 x i32> @maskz_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
178; CHECK-LABEL: maskz_shuffle_v8i32_23456789:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    kmovd %edi, %k1
181; CHECK-NEXT:    valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,4,5,6,7],ymm1[0,1]
182; CHECK-NEXT:    retq
183  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
184  %mask.cast = bitcast i8 %mask to <8 x i1>
185  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
186  ret <8 x i32> %res
187}
188
189define <8 x i32> @mask_shuffle_v8i32_12345670(<8 x i32> %a, <8 x i32> %passthru, i8 %mask) {
190; CHECK-LABEL: mask_shuffle_v8i32_12345670:
191; CHECK:       # %bb.0:
192; CHECK-NEXT:    kmovd %edi, %k1
193; CHECK-NEXT:    valignd {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,4,5,6,7,0]
194; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
195; CHECK-NEXT:    retq
196  %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
197  %mask.cast = bitcast i8 %mask to <8 x i1>
198  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
199  ret <8 x i32> %res
200}
201
202define <8 x i32> @maskz_shuffle_v8i32_12345670(<8 x i32> %a, i8 %mask) {
203; CHECK-LABEL: maskz_shuffle_v8i32_12345670:
204; CHECK:       # %bb.0:
205; CHECK-NEXT:    kmovd %edi, %k1
206; CHECK-NEXT:    valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7,0]
207; CHECK-NEXT:    retq
208  %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
209  %mask.cast = bitcast i8 %mask to <8 x i1>
210  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
211  ret <8 x i32> %res
212}
213
214define <8 x i32> @mask_shuffle_v8i32_23456701(<8 x i32> %a, <8 x i32> %passthru, i8 %mask) {
215; CHECK-LABEL: mask_shuffle_v8i32_23456701:
216; CHECK:       # %bb.0:
217; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0]
218; CHECK-NEXT:    kmovd %edi, %k1
219; CHECK-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
220; CHECK-NEXT:    retq
221  %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
222  %mask.cast = bitcast i8 %mask to <8 x i1>
223  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
224  ret <8 x i32> %res
225}
226
227define <8 x i32> @maskz_shuffle_v8i32_23456701(<8 x i32> %a, i8 %mask) {
228; CHECK-LABEL: maskz_shuffle_v8i32_23456701:
229; CHECK:       # %bb.0:
230; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0]
231; CHECK-NEXT:    kmovd %edi, %k1
232; CHECK-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
233; CHECK-NEXT:    retq
234  %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
235  %mask.cast = bitcast i8 %mask to <8 x i1>
236  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
237  ret <8 x i32> %res
238}
239
240define <4 x i32> @mask_extract_v8i32_v4i32_0(<8 x i32> %a, <4 x i32> %passthru, i8 %mask) {
241; CHECK-LABEL: mask_extract_v8i32_v4i32_0:
242; CHECK:       # %bb.0:
243; CHECK-NEXT:    kmovd %edi, %k1
244; CHECK-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
245; CHECK-NEXT:    vzeroupper
246; CHECK-NEXT:    retq
247  %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
248  %mask.cast = bitcast i8 %mask to <8 x i1>
249  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
250  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
251  ret <4 x i32> %res
252}
253
254define <4 x i32> @mask_extract_v8i32_v4i32_0_z(<8 x i32> %a, i8 %mask) {
255; CHECK-LABEL: mask_extract_v8i32_v4i32_0_z:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    kmovd %edi, %k1
258; CHECK-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
259; CHECK-NEXT:    vzeroupper
260; CHECK-NEXT:    retq
261  %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
262  %mask.cast = bitcast i8 %mask to <8 x i1>
263  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
264  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
265  ret <4 x i32> %res
266}
267
268define <4 x i32> @mask_extract_v8i32_v4i32_1(<8 x i32> %a, <4 x i32> %passthru, i8 %mask) {
269; CHECK-LABEL: mask_extract_v8i32_v4i32_1:
270; CHECK:       # %bb.0:
271; CHECK-NEXT:    kmovd %edi, %k1
272; CHECK-NEXT:    vextracti32x4 $1, %ymm0, %xmm1 {%k1}
273; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
274; CHECK-NEXT:    vzeroupper
275; CHECK-NEXT:    retq
276  %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
277  %mask.cast = bitcast i8 %mask to <8 x i1>
278  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
279  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
280  ret <4 x i32> %res
281}
282
283define <4 x i32> @mask_extract_v8i32_v4i32_1_z(<8 x i32> %a, i8 %mask) {
284; CHECK-LABEL: mask_extract_v8i32_v4i32_1_z:
285; CHECK:       # %bb.0:
286; CHECK-NEXT:    kmovd %edi, %k1
287; CHECK-NEXT:    vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z}
288; CHECK-NEXT:    vzeroupper
289; CHECK-NEXT:    retq
290  %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
291  %mask.cast = bitcast i8 %mask to <8 x i1>
292  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
293  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
294  ret <4 x i32> %res
295}
296
297define <4 x float> @mask_extract_v8f32_v4f32_0(<8 x float> %a, <4 x float> %passthru, i8 %mask) {
298; CHECK-LABEL: mask_extract_v8f32_v4f32_0:
299; CHECK:       # %bb.0:
300; CHECK-NEXT:    kmovd %edi, %k1
301; CHECK-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
302; CHECK-NEXT:    vzeroupper
303; CHECK-NEXT:    retq
304  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
305  %mask.cast = bitcast i8 %mask to <8 x i1>
306  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
307  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
308  ret <4 x float> %res
309}
310
311define <4 x float> @mask_extract_v8f32_v4f32_0_z(<8 x float> %a, i8 %mask) {
312; CHECK-LABEL: mask_extract_v8f32_v4f32_0_z:
313; CHECK:       # %bb.0:
314; CHECK-NEXT:    kmovd %edi, %k1
315; CHECK-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
316; CHECK-NEXT:    vzeroupper
317; CHECK-NEXT:    retq
318  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
319  %mask.cast = bitcast i8 %mask to <8 x i1>
320  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
321  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
322  ret <4 x float> %res
323}
324
325define <4 x float> @mask_extract_v8f32_v4f32_1(<8 x float> %a, <4 x float> %passthru, i8 %mask) {
326; CHECK-LABEL: mask_extract_v8f32_v4f32_1:
327; CHECK:       # %bb.0:
328; CHECK-NEXT:    kmovd %edi, %k1
329; CHECK-NEXT:    vextractf32x4 $1, %ymm0, %xmm1 {%k1}
330; CHECK-NEXT:    vmovaps %xmm1, %xmm0
331; CHECK-NEXT:    vzeroupper
332; CHECK-NEXT:    retq
333  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
334  %mask.cast = bitcast i8 %mask to <8 x i1>
335  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
336  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
337  ret <4 x float> %res
338}
339
340define <4 x float> @mask_extract_v8f32_v4f32_1_z(<8 x float> %a, i8 %mask) {
341; CHECK-LABEL: mask_extract_v8f32_v4f32_1_z:
342; CHECK:       # %bb.0:
343; CHECK-NEXT:    kmovd %edi, %k1
344; CHECK-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z}
345; CHECK-NEXT:    vzeroupper
346; CHECK-NEXT:    retq
347  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
348  %mask.cast = bitcast i8 %mask to <8 x i1>
349  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
350  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
351  ret <4 x float> %res
352}
353
354define <2 x i64> @mask_extract_v4i64_v2i64_0(<4 x i64> %a, <2 x i64> %passthru, i8 %mask) {
355; CHECK-LABEL: mask_extract_v4i64_v2i64_0:
356; CHECK:       # %bb.0:
357; CHECK-NEXT:    kmovd %edi, %k1
358; CHECK-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
359; CHECK-NEXT:    vzeroupper
360; CHECK-NEXT:    retq
361  %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
362  %mask.cast = bitcast i8 %mask to <8 x i1>
363  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
364  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
365  ret <2 x i64> %res
366}
367
368define <2 x i64> @mask_extract_v4i64_v2i64_0_z(<4 x i64> %a, i8 %mask) {
369; CHECK-LABEL: mask_extract_v4i64_v2i64_0_z:
370; CHECK:       # %bb.0:
371; CHECK-NEXT:    kmovd %edi, %k1
372; CHECK-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
373; CHECK-NEXT:    vzeroupper
374; CHECK-NEXT:    retq
375  %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
376  %mask.cast = bitcast i8 %mask to <8 x i1>
377  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
378  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
379  ret <2 x i64> %res
380}
381
382define <2 x i64> @mask_extract_v4i64_v2i64_1(<4 x i64> %a, <2 x i64> %passthru, i8 %mask) {
383; CHECK-LABEL: mask_extract_v4i64_v2i64_1:
384; CHECK:       # %bb.0:
385; CHECK-NEXT:    kmovd %edi, %k1
386; CHECK-NEXT:    vextracti64x2 $1, %ymm0, %xmm1 {%k1}
387; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
388; CHECK-NEXT:    vzeroupper
389; CHECK-NEXT:    retq
390  %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
391  %mask.cast = bitcast i8 %mask to <8 x i1>
392  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
393  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
394  ret <2 x i64> %res
395}
396
397define <2 x i64> @mask_extract_v4i64_v2i64_1_z(<4 x i64> %a, i8 %mask) {
398; CHECK-LABEL: mask_extract_v4i64_v2i64_1_z:
399; CHECK:       # %bb.0:
400; CHECK-NEXT:    kmovd %edi, %k1
401; CHECK-NEXT:    vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z}
402; CHECK-NEXT:    vzeroupper
403; CHECK-NEXT:    retq
404  %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
405  %mask.cast = bitcast i8 %mask to <8 x i1>
406  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
407  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
408  ret <2 x i64> %res
409}
410
411define <2 x double> @mask_extract_v4f64_v2f64_0(<4 x double> %a, <2 x double> %passthru, i8 %mask) {
412; CHECK-LABEL: mask_extract_v4f64_v2f64_0:
413; CHECK:       # %bb.0:
414; CHECK-NEXT:    kmovd %edi, %k1
415; CHECK-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
416; CHECK-NEXT:    vzeroupper
417; CHECK-NEXT:    retq
418  %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
419  %mask.cast = bitcast i8 %mask to <8 x i1>
420  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
421  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
422  ret <2 x double> %res
423}
424
425define <2 x double> @mask_extract_v4f64_v2f64_0_z(<4 x double> %a, i8 %mask) {
426; CHECK-LABEL: mask_extract_v4f64_v2f64_0_z:
427; CHECK:       # %bb.0:
428; CHECK-NEXT:    kmovd %edi, %k1
429; CHECK-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
430; CHECK-NEXT:    vzeroupper
431; CHECK-NEXT:    retq
432  %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
433  %mask.cast = bitcast i8 %mask to <8 x i1>
434  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
435  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
436  ret <2 x double> %res
437}
438
439define <2 x double> @mask_extract_v4f64_v2f64_1(<4 x double> %a, <2 x double> %passthru, i8 %mask) {
440; CHECK-LABEL: mask_extract_v4f64_v2f64_1:
441; CHECK:       # %bb.0:
442; CHECK-NEXT:    kmovd %edi, %k1
443; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm1 {%k1}
444; CHECK-NEXT:    vmovapd %xmm1, %xmm0
445; CHECK-NEXT:    vzeroupper
446; CHECK-NEXT:    retq
447  %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
448  %mask.cast = bitcast i8 %mask to <8 x i1>
449  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
450  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
451  ret <2 x double> %res
452}
453
454define <2 x double> @mask_extract_v4f64_v2f64_1_z(<4 x double> %a, i8 %mask) {
455; CHECK-LABEL: mask_extract_v4f64_v2f64_1_z:
456; CHECK:       # %bb.0:
457; CHECK-NEXT:    kmovd %edi, %k1
458; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z}
459; CHECK-NEXT:    vzeroupper
460; CHECK-NEXT:    retq
461  %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
462  %mask.cast = bitcast i8 %mask to <8 x i1>
463  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
464  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
465  ret <2 x double> %res
466}
467
468define <4 x i32> @mask_extract_v16i32_v4i32_0(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
469; CHECK-LABEL: mask_extract_v16i32_v4i32_0:
470; CHECK:       # %bb.0:
471; CHECK-NEXT:    kmovd %edi, %k1
472; CHECK-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
473; CHECK-NEXT:    vzeroupper
474; CHECK-NEXT:    retq
475  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
476  %mask.cast = bitcast i8 %mask to <8 x i1>
477  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
478  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
479  ret <4 x i32> %res
480}
481
482define <4 x i32> @mask_extract_v16i32_v4i32_0_z(<16 x i32> %a, i8 %mask) {
483; CHECK-LABEL: mask_extract_v16i32_v4i32_0_z:
484; CHECK:       # %bb.0:
485; CHECK-NEXT:    kmovd %edi, %k1
486; CHECK-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
487; CHECK-NEXT:    vzeroupper
488; CHECK-NEXT:    retq
489  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
490  %mask.cast = bitcast i8 %mask to <8 x i1>
491  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
492  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
493  ret <4 x i32> %res
494}
495
496define <4 x i32> @mask_extract_v16i32_v4i32_1(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
497; CHECK-LABEL: mask_extract_v16i32_v4i32_1:
498; CHECK:       # %bb.0:
499; CHECK-NEXT:    kmovd %edi, %k1
500; CHECK-NEXT:    vextracti32x4 $1, %zmm0, %xmm1 {%k1}
501; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
502; CHECK-NEXT:    vzeroupper
503; CHECK-NEXT:    retq
504  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
505  %mask.cast = bitcast i8 %mask to <8 x i1>
506  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
507  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
508  ret <4 x i32> %res
509}
510
511define <4 x i32> @mask_extract_v16i32_v4i32_1_z(<16 x i32> %a, i8 %mask) {
512; CHECK-LABEL: mask_extract_v16i32_v4i32_1_z:
513; CHECK:       # %bb.0:
514; CHECK-NEXT:    kmovd %edi, %k1
515; CHECK-NEXT:    vextracti32x4 $1, %zmm0, %xmm0 {%k1} {z}
516; CHECK-NEXT:    vzeroupper
517; CHECK-NEXT:    retq
518  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
519  %mask.cast = bitcast i8 %mask to <8 x i1>
520  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
521  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
522  ret <4 x i32> %res
523}
524
525define <4 x i32> @mask_extract_v16i32_v4i32_2(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
526; CHECK-LABEL: mask_extract_v16i32_v4i32_2:
527; CHECK:       # %bb.0:
528; CHECK-NEXT:    kmovd %edi, %k1
529; CHECK-NEXT:    vextracti32x4 $2, %zmm0, %xmm1 {%k1}
530; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
531; CHECK-NEXT:    vzeroupper
532; CHECK-NEXT:    retq
533  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
534  %mask.cast = bitcast i8 %mask to <8 x i1>
535  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
536  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
537  ret <4 x i32> %res
538}
539
540define <4 x i32> @mask_extract_v16i32_v4i32_3(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
541; CHECK-LABEL: mask_extract_v16i32_v4i32_3:
542; CHECK:       # %bb.0:
543; CHECK-NEXT:    kmovd %edi, %k1
544; CHECK-NEXT:    vextracti32x4 $3, %zmm0, %xmm1 {%k1}
545; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
546; CHECK-NEXT:    vzeroupper
547; CHECK-NEXT:    retq
548  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
549  %mask.cast = bitcast i8 %mask to <8 x i1>
550  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
551  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
552  ret <4 x i32> %res
553}
554
555define <4 x float> @mask_extract_v16f32_v4f32_0(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
556; CHECK-LABEL: mask_extract_v16f32_v4f32_0:
557; CHECK:       # %bb.0:
558; CHECK-NEXT:    kmovd %edi, %k1
559; CHECK-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
560; CHECK-NEXT:    vzeroupper
561; CHECK-NEXT:    retq
562  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
563  %mask.cast = bitcast i8 %mask to <8 x i1>
564  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
565  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
566  ret <4 x float> %res
567}
568
569define <4 x float> @mask_extract_v16f32_v4f32_0_z(<16 x float> %a, i8 %mask) {
570; CHECK-LABEL: mask_extract_v16f32_v4f32_0_z:
571; CHECK:       # %bb.0:
572; CHECK-NEXT:    kmovd %edi, %k1
573; CHECK-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
574; CHECK-NEXT:    vzeroupper
575; CHECK-NEXT:    retq
576  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
577  %mask.cast = bitcast i8 %mask to <8 x i1>
578  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
579  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
580  ret <4 x float> %res
581}
582
583define <4 x float> @mask_extract_v16f32_v4f32_1(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
584; CHECK-LABEL: mask_extract_v16f32_v4f32_1:
585; CHECK:       # %bb.0:
586; CHECK-NEXT:    kmovd %edi, %k1
587; CHECK-NEXT:    vextractf32x4 $1, %zmm0, %xmm1 {%k1}
588; CHECK-NEXT:    vmovaps %xmm1, %xmm0
589; CHECK-NEXT:    vzeroupper
590; CHECK-NEXT:    retq
591  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
592  %mask.cast = bitcast i8 %mask to <8 x i1>
593  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
594  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
595  ret <4 x float> %res
596}
597
598define <4 x float> @mask_extract_v16f32_v4f32_1_z(<16 x float> %a, i8 %mask) {
599; CHECK-LABEL: mask_extract_v16f32_v4f32_1_z:
600; CHECK:       # %bb.0:
601; CHECK-NEXT:    kmovd %edi, %k1
602; CHECK-NEXT:    vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
603; CHECK-NEXT:    vzeroupper
604; CHECK-NEXT:    retq
605  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
606  %mask.cast = bitcast i8 %mask to <8 x i1>
607  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
608  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
609  ret <4 x float> %res
610}
611
612define <4 x float> @mask_extract_v16f32_v4f32_2(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
613; CHECK-LABEL: mask_extract_v16f32_v4f32_2:
614; CHECK:       # %bb.0:
615; CHECK-NEXT:    kmovd %edi, %k1
616; CHECK-NEXT:    vextractf32x4 $2, %zmm0, %xmm1 {%k1}
617; CHECK-NEXT:    vmovaps %xmm1, %xmm0
618; CHECK-NEXT:    vzeroupper
619; CHECK-NEXT:    retq
620  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
621  %mask.cast = bitcast i8 %mask to <8 x i1>
622  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
623  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
624  ret <4 x float> %res
625}
626
627define <4 x float> @mask_extract_v16f32_v4f32_3(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
628; CHECK-LABEL: mask_extract_v16f32_v4f32_3:
629; CHECK:       # %bb.0:
630; CHECK-NEXT:    kmovd %edi, %k1
631; CHECK-NEXT:    vextractf32x4 $3, %zmm0, %xmm1 {%k1}
632; CHECK-NEXT:    vmovaps %xmm1, %xmm0
633; CHECK-NEXT:    vzeroupper
634; CHECK-NEXT:    retq
635  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
636  %mask.cast = bitcast i8 %mask to <8 x i1>
637  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
638  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
639  ret <4 x float> %res
640}
641
642define <8 x i32> @mask_extract_v16i32_v8i32_0(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) {
643; CHECK-LABEL: mask_extract_v16i32_v8i32_0:
644; CHECK:       # %bb.0:
645; CHECK-NEXT:    kmovd %edi, %k1
646; CHECK-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
647; CHECK-NEXT:    retq
648  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
649  %mask.cast = bitcast i8 %mask to <8 x i1>
650  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
651  ret <8 x i32> %res
652}
653
654define <8 x i32> @mask_extract_v16i32_v8i32_0_z(<16 x i32> %a, i8 %mask) {
655; CHECK-LABEL: mask_extract_v16i32_v8i32_0_z:
656; CHECK:       # %bb.0:
657; CHECK-NEXT:    kmovd %edi, %k1
658; CHECK-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
659; CHECK-NEXT:    retq
660  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
661  %mask.cast = bitcast i8 %mask to <8 x i1>
662  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
663  ret <8 x i32> %res
664}
665
666define <8 x i32> @mask_extract_v16i32_v8i32_1(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) {
667; CHECK-LABEL: mask_extract_v16i32_v8i32_1:
668; CHECK:       # %bb.0:
669; CHECK-NEXT:    kmovd %edi, %k1
670; CHECK-NEXT:    vextracti32x8 $1, %zmm0, %ymm1 {%k1}
671; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
672; CHECK-NEXT:    retq
673  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
674  %mask.cast = bitcast i8 %mask to <8 x i1>
675  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
676  ret <8 x i32> %res
677}
678
679define <8 x i32> @mask_extract_v16i32_v8i32_1_z(<16 x i32> %a, i8 %mask) {
680; CHECK-LABEL: mask_extract_v16i32_v8i32_1_z:
681; CHECK:       # %bb.0:
682; CHECK-NEXT:    kmovd %edi, %k1
683; CHECK-NEXT:    vextracti32x8 $1, %zmm0, %ymm0 {%k1} {z}
684; CHECK-NEXT:    retq
685  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
686  %mask.cast = bitcast i8 %mask to <8 x i1>
687  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
688  ret <8 x i32> %res
689}
690
691define <8 x float> @mask_extract_v16f32_v8f32_0(<16 x float> %a, <8 x float> %passthru, i8 %mask) {
692; CHECK-LABEL: mask_extract_v16f32_v8f32_0:
693; CHECK:       # %bb.0:
694; CHECK-NEXT:    kmovd %edi, %k1
695; CHECK-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
696; CHECK-NEXT:    retq
697  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
698  %mask.cast = bitcast i8 %mask to <8 x i1>
699  %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> %passthru
700  ret <8 x float> %res
701}
702
703define <8 x float> @mask_extract_v16f32_v8f32_0_z(<16 x float> %a, i8 %mask) {
704; CHECK-LABEL: mask_extract_v16f32_v8f32_0_z:
705; CHECK:       # %bb.0:
706; CHECK-NEXT:    kmovd %edi, %k1
707; CHECK-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
708; CHECK-NEXT:    retq
709  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
710  %mask.cast = bitcast i8 %mask to <8 x i1>
711  %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> zeroinitializer
712  ret <8 x float> %res
713}
714
715define <8 x float> @mask_extract_v16f32_v8f32_1(<16 x float> %a, <8 x float> %passthru, i8 %mask) {
716; CHECK-LABEL: mask_extract_v16f32_v8f32_1:
717; CHECK:       # %bb.0:
718; CHECK-NEXT:    kmovd %edi, %k1
719; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm1 {%k1}
720; CHECK-NEXT:    vmovaps %ymm1, %ymm0
721; CHECK-NEXT:    retq
722  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
723  %mask.cast = bitcast i8 %mask to <8 x i1>
724  %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> %passthru
725  ret <8 x float> %res
726}
727
728define <8 x float> @mask_extract_v16f32_v8f32_1_z(<16 x float> %a, i8 %mask) {
729; CHECK-LABEL: mask_extract_v16f32_v8f32_1_z:
730; CHECK:       # %bb.0:
731; CHECK-NEXT:    kmovd %edi, %k1
732; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
733; CHECK-NEXT:    retq
734  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
735  %mask.cast = bitcast i8 %mask to <8 x i1>
736  %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> zeroinitializer
737  ret <8 x float> %res
738}
739
740define <2 x i64> @mask_extract_v8i64_v2i64_0(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
741; CHECK-LABEL: mask_extract_v8i64_v2i64_0:
742; CHECK:       # %bb.0:
743; CHECK-NEXT:    kmovd %edi, %k1
744; CHECK-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
745; CHECK-NEXT:    vzeroupper
746; CHECK-NEXT:    retq
747  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
748  %mask.cast = bitcast i8 %mask to <8 x i1>
749  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
750  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
751  ret <2 x i64> %res
752}
753
754define <2 x i64> @mask_extract_v8i64_v2i64_0_z(<8 x i64> %a, i8 %mask) {
755; CHECK-LABEL: mask_extract_v8i64_v2i64_0_z:
756; CHECK:       # %bb.0:
757; CHECK-NEXT:    kmovd %edi, %k1
758; CHECK-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
759; CHECK-NEXT:    vzeroupper
760; CHECK-NEXT:    retq
761  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
762  %mask.cast = bitcast i8 %mask to <8 x i1>
763  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
764  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
765  ret <2 x i64> %res
766}
767
768define <2 x i64> @mask_extract_v8i64_v2i64_1(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
769; CHECK-LABEL: mask_extract_v8i64_v2i64_1:
770; CHECK:       # %bb.0:
771; CHECK-NEXT:    kmovd %edi, %k1
772; CHECK-NEXT:    vextracti64x2 $1, %zmm0, %xmm1 {%k1}
773; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
774; CHECK-NEXT:    vzeroupper
775; CHECK-NEXT:    retq
776  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
777  %mask.cast = bitcast i8 %mask to <8 x i1>
778  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
779  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
780  ret <2 x i64> %res
781}
782
783define <2 x i64> @mask_extract_v8i64_v2i64_1_z(<8 x i64> %a, i8 %mask) {
784; CHECK-LABEL: mask_extract_v8i64_v2i64_1_z:
785; CHECK:       # %bb.0:
786; CHECK-NEXT:    kmovd %edi, %k1
787; CHECK-NEXT:    vextracti64x2 $1, %zmm0, %xmm0 {%k1} {z}
788; CHECK-NEXT:    vzeroupper
789; CHECK-NEXT:    retq
790  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
791  %mask.cast = bitcast i8 %mask to <8 x i1>
792  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
793  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
794  ret <2 x i64> %res
795}
796
797define <2 x i64> @mask_extract_v8i64_v2i64_2(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
798; CHECK-LABEL: mask_extract_v8i64_v2i64_2:
799; CHECK:       # %bb.0:
800; CHECK-NEXT:    kmovd %edi, %k1
801; CHECK-NEXT:    vextracti64x2 $2, %zmm0, %xmm1 {%k1}
802; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
803; CHECK-NEXT:    vzeroupper
804; CHECK-NEXT:    retq
805  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 4, i32 5>
806  %mask.cast = bitcast i8 %mask to <8 x i1>
807  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
808  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
809  ret <2 x i64> %res
810}
811
812define <2 x i64> @mask_extract_v8i64_v2i64_3(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
813; CHECK-LABEL: mask_extract_v8i64_v2i64_3:
814; CHECK:       # %bb.0:
815; CHECK-NEXT:    kmovd %edi, %k1
816; CHECK-NEXT:    vextracti64x2 $3, %zmm0, %xmm1 {%k1}
817; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
818; CHECK-NEXT:    vzeroupper
819; CHECK-NEXT:    retq
820  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
821  %mask.cast = bitcast i8 %mask to <8 x i1>
822  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
823  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
824  ret <2 x i64> %res
825}
826
827define <2 x double> @mask_extract_v8f64_v2f64_0(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
828; CHECK-LABEL: mask_extract_v8f64_v2f64_0:
829; CHECK:       # %bb.0:
830; CHECK-NEXT:    kmovd %edi, %k1
831; CHECK-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
832; CHECK-NEXT:    vzeroupper
833; CHECK-NEXT:    retq
834  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
835  %mask.cast = bitcast i8 %mask to <8 x i1>
836  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
837  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
838  ret <2 x double> %res
839}
840
841define <2 x double> @mask_extract_v8f64_v2f64_0_z(<8 x double> %a, i8 %mask) {
842; CHECK-LABEL: mask_extract_v8f64_v2f64_0_z:
843; CHECK:       # %bb.0:
844; CHECK-NEXT:    kmovd %edi, %k1
845; CHECK-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
846; CHECK-NEXT:    vzeroupper
847; CHECK-NEXT:    retq
848  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
849  %mask.cast = bitcast i8 %mask to <8 x i1>
850  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
851  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
852  ret <2 x double> %res
853}
854
855define <2 x double> @mask_extract_v8f64_v2f64_1(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
856; CHECK-LABEL: mask_extract_v8f64_v2f64_1:
857; CHECK:       # %bb.0:
858; CHECK-NEXT:    kmovd %edi, %k1
859; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm1 {%k1}
860; CHECK-NEXT:    vmovapd %xmm1, %xmm0
861; CHECK-NEXT:    vzeroupper
862; CHECK-NEXT:    retq
863  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
864  %mask.cast = bitcast i8 %mask to <8 x i1>
865  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
866  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
867  ret <2 x double> %res
868}
869
870define <2 x double> @mask_extract_v8f64_v2f64_1_z(<8 x double> %a, i8 %mask) {
871; CHECK-LABEL: mask_extract_v8f64_v2f64_1_z:
872; CHECK:       # %bb.0:
873; CHECK-NEXT:    kmovd %edi, %k1
874; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z}
875; CHECK-NEXT:    vzeroupper
876; CHECK-NEXT:    retq
877  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
878  %mask.cast = bitcast i8 %mask to <8 x i1>
879  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
880  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
881  ret <2 x double> %res
882}
883
884define <2 x double> @mask_extract_v8f64_v2f64_2(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
885; CHECK-LABEL: mask_extract_v8f64_v2f64_2:
886; CHECK:       # %bb.0:
887; CHECK-NEXT:    kmovd %edi, %k1
888; CHECK-NEXT:    vextractf64x2 $2, %zmm0, %xmm1 {%k1}
889; CHECK-NEXT:    vmovapd %xmm1, %xmm0
890; CHECK-NEXT:    vzeroupper
891; CHECK-NEXT:    retq
892  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 4, i32 5>
893  %mask.cast = bitcast i8 %mask to <8 x i1>
894  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
895  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
896  ret <2 x double> %res
897}
898
899define <2 x double> @mask_extract_v8f64_v2f64_3(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
900; CHECK-LABEL: mask_extract_v8f64_v2f64_3:
901; CHECK:       # %bb.0:
902; CHECK-NEXT:    kmovd %edi, %k1
903; CHECK-NEXT:    vextractf64x2 $3, %zmm0, %xmm1 {%k1}
904; CHECK-NEXT:    vmovapd %xmm1, %xmm0
905; CHECK-NEXT:    vzeroupper
906; CHECK-NEXT:    retq
907  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 6, i32 7>
908  %mask.cast = bitcast i8 %mask to <8 x i1>
909  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
910  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
911  ret <2 x double> %res
912}
913
914define <4 x i64> @mask_extract_v8i64_v4i64_0(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) {
915; CHECK-LABEL: mask_extract_v8i64_v4i64_0:
916; CHECK:       # %bb.0:
917; CHECK-NEXT:    kmovd %edi, %k1
918; CHECK-NEXT:    vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
919; CHECK-NEXT:    retq
920  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
921  %mask.cast = bitcast i8 %mask to <8 x i1>
922  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
923  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
924  ret <4 x i64> %res
925}
926
927define <4 x i64> @mask_extract_v8i64_v4i64_0_z(<8 x i64> %a, i8 %mask) {
928; CHECK-LABEL: mask_extract_v8i64_v4i64_0_z:
929; CHECK:       # %bb.0:
930; CHECK-NEXT:    kmovd %edi, %k1
931; CHECK-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
932; CHECK-NEXT:    retq
933  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
934  %mask.cast = bitcast i8 %mask to <8 x i1>
935  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
936  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
937  ret <4 x i64> %res
938}
939
940define <4 x i64> @mask_extract_v8i64_v4i64_1(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) {
941; CHECK-LABEL: mask_extract_v8i64_v4i64_1:
942; CHECK:       # %bb.0:
943; CHECK-NEXT:    kmovd %edi, %k1
944; CHECK-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 {%k1}
945; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
946; CHECK-NEXT:    retq
947  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
948  %mask.cast = bitcast i8 %mask to <8 x i1>
949  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
950  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
951  ret <4 x i64> %res
952}
953
954define <4 x i64> @mask_extract_v8i64_v4i64_1_z(<8 x i64> %a, i8 %mask) {
955; CHECK-LABEL: mask_extract_v8i64_v4i64_1_z:
956; CHECK:       # %bb.0:
957; CHECK-NEXT:    kmovd %edi, %k1
958; CHECK-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 {%k1} {z}
959; CHECK-NEXT:    retq
960  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
961  %mask.cast = bitcast i8 %mask to <8 x i1>
962  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
963  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
964  ret <4 x i64> %res
965}
966
967define <4 x double> @mask_extract_v8f64_v4f64_0(<8 x double> %a, <4 x double> %passthru, i8 %mask) {
968; CHECK-LABEL: mask_extract_v8f64_v4f64_0:
969; CHECK:       # %bb.0:
970; CHECK-NEXT:    kmovd %edi, %k1
971; CHECK-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
972; CHECK-NEXT:    retq
973  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
974  %mask.cast = bitcast i8 %mask to <8 x i1>
975  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
976  %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> %passthru
977  ret <4 x double> %res
978}
979
980define <4 x double> @mask_extract_v8f64_v4f64_0_z(<8 x double> %a, i8 %mask) {
981; CHECK-LABEL: mask_extract_v8f64_v4f64_0_z:
982; CHECK:       # %bb.0:
983; CHECK-NEXT:    kmovd %edi, %k1
984; CHECK-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
985; CHECK-NEXT:    retq
986  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
987  %mask.cast = bitcast i8 %mask to <8 x i1>
988  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
989  %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> zeroinitializer
990  ret <4 x double> %res
991}
992
993define <4 x double> @mask_extract_v8f64_v4f64_1(<8 x double> %a, <4 x double> %passthru, i8 %mask) {
994; CHECK-LABEL: mask_extract_v8f64_v4f64_1:
995; CHECK:       # %bb.0:
996; CHECK-NEXT:    kmovd %edi, %k1
997; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 {%k1}
998; CHECK-NEXT:    vmovapd %ymm1, %ymm0
999; CHECK-NEXT:    retq
1000  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1001  %mask.cast = bitcast i8 %mask to <8 x i1>
1002  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1003  %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> %passthru
1004  ret <4 x double> %res
1005}
1006
1007define <4 x double> @mask_extract_v8f64_v4f64_1_z(<8 x double> %a, i8 %mask) {
1008; CHECK-LABEL: mask_extract_v8f64_v4f64_1_z:
1009; CHECK:       # %bb.0:
1010; CHECK-NEXT:    kmovd %edi, %k1
1011; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
1012; CHECK-NEXT:    retq
1013  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1014  %mask.cast = bitcast i8 %mask to <8 x i1>
1015  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1016  %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> zeroinitializer
1017  ret <4 x double> %res
1018}
1019
1020define <8 x i32> @mask_cast_extract_v8i64_v8i32_0(<8 x i64> %a, <8 x i32> %passthru, i8 %mask) {
1021; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_0:
1022; CHECK:       # %bb.0:
1023; CHECK-NEXT:    kmovd %edi, %k1
1024; CHECK-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
1025; CHECK-NEXT:    retq
1026  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1027  %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
1028  %mask.cast = bitcast i8 %mask to <8 x i1>
1029  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> %passthru
1030  ret <8 x i32> %res
1031}
1032
1033define <8 x i32> @mask_cast_extract_v8i64_v8i32_0_z(<8 x i64> %a, i8 %mask) {
1034; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_0_z:
1035; CHECK:       # %bb.0:
1036; CHECK-NEXT:    kmovd %edi, %k1
1037; CHECK-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1038; CHECK-NEXT:    retq
1039  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1040  %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
1041  %mask.cast = bitcast i8 %mask to <8 x i1>
1042  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> zeroinitializer
1043  ret <8 x i32> %res
1044}
1045
1046define <8 x i32> @mask_cast_extract_v8i64_v8i32_1(<8 x i64> %a, <8 x i32> %passthru, i8 %mask) {
1047; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_1:
1048; CHECK:       # %bb.0:
1049; CHECK-NEXT:    kmovd %edi, %k1
1050; CHECK-NEXT:    vextracti32x8 $1, %zmm0, %ymm1 {%k1}
1051; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
1052; CHECK-NEXT:    retq
1053  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1054  %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
1055  %mask.cast = bitcast i8 %mask to <8 x i1>
1056  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> %passthru
1057  ret <8 x i32> %res
1058}
1059
1060define <8 x i32> @mask_cast_extract_v8i64_v8i32_1_z(<8 x i64> %a, i8 %mask) {
1061; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_1_z:
1062; CHECK:       # %bb.0:
1063; CHECK-NEXT:    kmovd %edi, %k1
1064; CHECK-NEXT:    vextracti32x8 $1, %zmm0, %ymm0 {%k1} {z}
1065; CHECK-NEXT:    retq
1066  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1067  %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
1068  %mask.cast = bitcast i8 %mask to <8 x i1>
1069  %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> zeroinitializer
1070  ret <8 x i32> %res
1071}
1072
1073define <8 x float> @mask_cast_extract_v8f64_v8f32_0(<8 x double> %a, <8 x float> %passthru, i8 %mask) {
1074; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_0:
1075; CHECK:       # %bb.0:
1076; CHECK-NEXT:    kmovd %edi, %k1
1077; CHECK-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
1078; CHECK-NEXT:    retq
1079  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1080  %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
1081  %mask.cast = bitcast i8 %mask to <8 x i1>
1082  %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> %passthru
1083  ret <8 x float> %res
1084}
1085
1086define <8 x float> @mask_cast_extract_v8f64_v8f32_0_z(<8 x double> %a, i8 %mask) {
1087; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_0_z:
1088; CHECK:       # %bb.0:
1089; CHECK-NEXT:    kmovd %edi, %k1
1090; CHECK-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
1091; CHECK-NEXT:    retq
1092  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1093  %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
1094  %mask.cast = bitcast i8 %mask to <8 x i1>
1095  %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> zeroinitializer
1096  ret <8 x float> %res
1097}
1098
1099define <8 x float> @mask_cast_extract_v8f64_v8f32_1(<8 x double> %a, <8 x float> %passthru, i8 %mask) {
1100; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_1:
1101; CHECK:       # %bb.0:
1102; CHECK-NEXT:    kmovd %edi, %k1
1103; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm1 {%k1}
1104; CHECK-NEXT:    vmovaps %ymm1, %ymm0
1105; CHECK-NEXT:    retq
1106  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1107  %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
1108  %mask.cast = bitcast i8 %mask to <8 x i1>
1109  %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> %passthru
1110  ret <8 x float> %res
1111}
1112
1113define <8 x float> @mask_cast_extract_v8f64_v8f32_1_z(<8 x double> %a, i8 %mask) {
1114; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_1_z:
1115; CHECK:       # %bb.0:
1116; CHECK-NEXT:    kmovd %edi, %k1
1117; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
1118; CHECK-NEXT:    retq
1119  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1120  %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
1121  %mask.cast = bitcast i8 %mask to <8 x i1>
1122  %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> zeroinitializer
1123  ret <8 x float> %res
1124}
1125
1126define <4 x i32> @mask_cast_extract_v8i64_v4i32_0(<8 x i64> %a, <4 x i32> %passthru, i8 %mask) {
1127; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_0:
1128; CHECK:       # %bb.0:
1129; CHECK-NEXT:    kmovd %edi, %k1
1130; CHECK-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
1131; CHECK-NEXT:    vzeroupper
1132; CHECK-NEXT:    retq
1133  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
1134  %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
1135  %mask.cast = bitcast i8 %mask to <8 x i1>
1136  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1137  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> %passthru
1138  ret <4 x i32> %res
1139}
1140
1141define <4 x i32> @mask_cast_extract_v8i64_v4i32_0_z(<8 x i64> %a, i8 %mask) {
1142; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_0_z:
1143; CHECK:       # %bb.0:
1144; CHECK-NEXT:    kmovd %edi, %k1
1145; CHECK-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1146; CHECK-NEXT:    vzeroupper
1147; CHECK-NEXT:    retq
1148  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
1149  %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
1150  %mask.cast = bitcast i8 %mask to <8 x i1>
1151  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1152  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> zeroinitializer
1153  ret <4 x i32> %res
1154}
1155
1156define <4 x i32> @mask_cast_extract_v8i64_v4i32_1(<8 x i64> %a, <4 x i32> %passthru, i8 %mask) {
1157; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_1:
1158; CHECK:       # %bb.0:
1159; CHECK-NEXT:    kmovd %edi, %k1
1160; CHECK-NEXT:    vextracti32x4 $1, %zmm0, %xmm1 {%k1}
1161; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
1162; CHECK-NEXT:    vzeroupper
1163; CHECK-NEXT:    retq
1164  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
1165  %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
1166  %mask.cast = bitcast i8 %mask to <8 x i1>
1167  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1168  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> %passthru
1169  ret <4 x i32> %res
1170}
1171
1172define <4 x i32> @mask_cast_extract_v8i64_v4i32_1_z(<8 x i64> %a, i8 %mask) {
1173; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_1_z:
1174; CHECK:       # %bb.0:
1175; CHECK-NEXT:    kmovd %edi, %k1
1176; CHECK-NEXT:    vextracti32x4 $1, %zmm0, %xmm0 {%k1} {z}
1177; CHECK-NEXT:    vzeroupper
1178; CHECK-NEXT:    retq
1179  %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
1180  %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
1181  %mask.cast = bitcast i8 %mask to <8 x i1>
1182  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1183  %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> zeroinitializer
1184  ret <4 x i32> %res
1185}
1186
1187define <4 x float> @mask_cast_extract_v8f64_v4f32_0(<8 x double> %a, <4 x float> %passthru, i8 %mask) {
1188; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_0:
1189; CHECK:       # %bb.0:
1190; CHECK-NEXT:    kmovd %edi, %k1
1191; CHECK-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
1192; CHECK-NEXT:    vzeroupper
1193; CHECK-NEXT:    retq
1194  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1195  %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
1196  %mask.cast = bitcast i8 %mask to <8 x i1>
1197  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1198  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> %passthru
1199  ret <4 x float> %res
1200}
1201
1202define <4 x float> @mask_cast_extract_v8f64_v4f32_0_z(<8 x double> %a, i8 %mask) {
1203; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_0_z:
1204; CHECK:       # %bb.0:
1205; CHECK-NEXT:    kmovd %edi, %k1
1206; CHECK-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
1207; CHECK-NEXT:    vzeroupper
1208; CHECK-NEXT:    retq
1209  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1210  %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
1211  %mask.cast = bitcast i8 %mask to <8 x i1>
1212  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1213  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> zeroinitializer
1214  ret <4 x float> %res
1215}
1216
1217define <4 x float> @mask_cast_extract_v8f64_v4f32_1(<8 x double> %a, <4 x float> %passthru, i8 %mask) {
1218; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_1:
1219; CHECK:       # %bb.0:
1220; CHECK-NEXT:    kmovd %edi, %k1
1221; CHECK-NEXT:    vextractf32x4 $1, %zmm0, %xmm1 {%k1}
1222; CHECK-NEXT:    vmovaps %xmm1, %xmm0
1223; CHECK-NEXT:    vzeroupper
1224; CHECK-NEXT:    retq
1225  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1226  %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
1227  %mask.cast = bitcast i8 %mask to <8 x i1>
1228  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1229  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> %passthru
1230  ret <4 x float> %res
1231}
1232
1233define <4 x float> @mask_cast_extract_v8f64_v4f32_1_z(<8 x double> %a, i8 %mask) {
1234; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_1_z:
1235; CHECK:       # %bb.0:
1236; CHECK-NEXT:    kmovd %edi, %k1
1237; CHECK-NEXT:    vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
1238; CHECK-NEXT:    vzeroupper
1239; CHECK-NEXT:    retq
1240  %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1241  %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
1242  %mask.cast = bitcast i8 %mask to <8 x i1>
1243  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1244  %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> zeroinitializer
1245  ret <4 x float> %res
1246}
1247
1248define <4 x i64> @mask_cast_extract_v16i32_v4i64_0(<16 x i32> %a, <4 x i64> %passthru, i8 %mask) {
1249; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_0:
1250; CHECK:       # %bb.0:
1251; CHECK-NEXT:    kmovd %edi, %k1
1252; CHECK-NEXT:    vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
1253; CHECK-NEXT:    retq
1254  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1255  %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
1256  %mask.cast = bitcast i8 %mask to <8 x i1>
1257  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1258  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> %passthru
1259  ret <4 x i64> %res
1260}
1261
1262define <4 x i64> @mask_cast_extract_v16i32_v4i64_0_z(<16 x i32> %a, i8 %mask) {
1263; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_0_z:
1264; CHECK:       # %bb.0:
1265; CHECK-NEXT:    kmovd %edi, %k1
1266; CHECK-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
1267; CHECK-NEXT:    retq
1268  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1269  %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
1270  %mask.cast = bitcast i8 %mask to <8 x i1>
1271  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1272  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> zeroinitializer
1273  ret <4 x i64> %res
1274}
1275
1276define <4 x i64> @mask_cast_extract_v16i32_v4i64_1(<16 x i32> %a, <4 x i64> %passthru, i8 %mask) {
1277; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_1:
1278; CHECK:       # %bb.0:
1279; CHECK-NEXT:    kmovd %edi, %k1
1280; CHECK-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 {%k1}
1281; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
1282; CHECK-NEXT:    retq
1283  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1284  %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
1285  %mask.cast = bitcast i8 %mask to <8 x i1>
1286  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1287  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> %passthru
1288  ret <4 x i64> %res
1289}
1290
1291define <4 x i64> @mask_cast_extract_v16i32_v4i64_1_z(<16 x i32> %a, i8 %mask) {
1292; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_1_z:
1293; CHECK:       # %bb.0:
1294; CHECK-NEXT:    kmovd %edi, %k1
1295; CHECK-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 {%k1} {z}
1296; CHECK-NEXT:    retq
1297  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1298  %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
1299  %mask.cast = bitcast i8 %mask to <8 x i1>
1300  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1301  %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> zeroinitializer
1302  ret <4 x i64> %res
1303}
1304
1305define <4 x double> @mask_cast_extract_v16f32_v4f64_0(<16 x float> %a, <4 x double> %passthru, i8 %mask) {
1306; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_0:
1307; CHECK:       # %bb.0:
1308; CHECK-NEXT:    kmovd %edi, %k1
1309; CHECK-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
1310; CHECK-NEXT:    retq
1311  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1312  %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
1313  %mask.cast = bitcast i8 %mask to <8 x i1>
1314  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1315  %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> %passthru
1316  ret <4 x double> %res
1317}
1318
1319define <4 x double> @mask_cast_extract_v16f32_v4f64_0_z(<16 x float> %a, i8 %mask) {
1320; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_0_z:
1321; CHECK:       # %bb.0:
1322; CHECK-NEXT:    kmovd %edi, %k1
1323; CHECK-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
1324; CHECK-NEXT:    retq
1325  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1326  %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
1327  %mask.cast = bitcast i8 %mask to <8 x i1>
1328  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1329  %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> zeroinitializer
1330  ret <4 x double> %res
1331}
1332
1333define <4 x double> @mask_cast_extract_v16f32_v4f64_1(<16 x float> %a, <4 x double> %passthru, i8 %mask) {
1334; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_1:
1335; CHECK:       # %bb.0:
1336; CHECK-NEXT:    kmovd %edi, %k1
1337; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 {%k1}
1338; CHECK-NEXT:    vmovapd %ymm1, %ymm0
1339; CHECK-NEXT:    retq
1340  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1341  %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
1342  %mask.cast = bitcast i8 %mask to <8 x i1>
1343  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1344  %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> %passthru
1345  ret <4 x double> %res
1346}
1347
1348define <4 x double> @mask_cast_extract_v16f32_v4f64_1_z(<16 x float> %a, i8 %mask) {
1349; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_1_z:
1350; CHECK:       # %bb.0:
1351; CHECK-NEXT:    kmovd %edi, %k1
1352; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
1353; CHECK-NEXT:    retq
1354  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1355  %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
1356  %mask.cast = bitcast i8 %mask to <8 x i1>
1357  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1358  %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> zeroinitializer
1359  ret <4 x double> %res
1360}
1361
1362define <2 x i64> @mask_cast_extract_v16i32_v2i64_0(<16 x i32> %a, <2 x i64> %passthru, i8 %mask) {
1363; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_0:
1364; CHECK:       # %bb.0:
1365; CHECK-NEXT:    kmovd %edi, %k1
1366; CHECK-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
1367; CHECK-NEXT:    vzeroupper
1368; CHECK-NEXT:    retq
1369  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1370  %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
1371  %mask.cast = bitcast i8 %mask to <8 x i1>
1372  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1373  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> %passthru
1374  ret <2 x i64> %res
1375}
1376
1377define <2 x i64> @mask_cast_extract_v16i32_v2i64_0_z(<16 x i32> %a, i8 %mask) {
1378; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_0_z:
1379; CHECK:       # %bb.0:
1380; CHECK-NEXT:    kmovd %edi, %k1
1381; CHECK-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1382; CHECK-NEXT:    vzeroupper
1383; CHECK-NEXT:    retq
1384  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1385  %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
1386  %mask.cast = bitcast i8 %mask to <8 x i1>
1387  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1388  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> zeroinitializer
1389  ret <2 x i64> %res
1390}
1391
1392define <2 x i64> @mask_cast_extract_v16i32_v2i64_1(<16 x i32> %a, <2 x i64> %passthru, i8 %mask) {
1393; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_1:
1394; CHECK:       # %bb.0:
1395; CHECK-NEXT:    kmovd %edi, %k1
1396; CHECK-NEXT:    vextracti64x2 $1, %zmm0, %xmm1 {%k1}
1397; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
1398; CHECK-NEXT:    vzeroupper
1399; CHECK-NEXT:    retq
1400  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1401  %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
1402  %mask.cast = bitcast i8 %mask to <8 x i1>
1403  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1404  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> %passthru
1405  ret <2 x i64> %res
1406}
1407
1408define <2 x i64> @mask_cast_extract_v16i32_v2i64_1_z(<16 x i32> %a, i8 %mask) {
1409; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_1_z:
1410; CHECK:       # %bb.0:
1411; CHECK-NEXT:    kmovd %edi, %k1
1412; CHECK-NEXT:    vextracti64x2 $1, %zmm0, %xmm0 {%k1} {z}
1413; CHECK-NEXT:    vzeroupper
1414; CHECK-NEXT:    retq
1415  %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1416  %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
1417  %mask.cast = bitcast i8 %mask to <8 x i1>
1418  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1419  %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> zeroinitializer
1420  ret <2 x i64> %res
1421}
1422
1423define <2 x double> @mask_cast_extract_v16f32_v2f64_0(<16 x float> %a, <2 x double> %passthru, i8 %mask) {
1424; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_0:
1425; CHECK:       # %bb.0:
1426; CHECK-NEXT:    kmovd %edi, %k1
1427; CHECK-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
1428; CHECK-NEXT:    vzeroupper
1429; CHECK-NEXT:    retq
1430  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1431  %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
1432  %mask.cast = bitcast i8 %mask to <8 x i1>
1433  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1434  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru
1435  ret <2 x double> %res
1436}
1437
1438define <2 x double> @mask_cast_extract_v16f32_v2f64_0_z(<16 x float> %a, i8 %mask) {
1439; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_0_z:
1440; CHECK:       # %bb.0:
1441; CHECK-NEXT:    kmovd %edi, %k1
1442; CHECK-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
1443; CHECK-NEXT:    vzeroupper
1444; CHECK-NEXT:    retq
1445  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1446  %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
1447  %mask.cast = bitcast i8 %mask to <8 x i1>
1448  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1449  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> zeroinitializer
1450  ret <2 x double> %res
1451}
1452
1453define <2 x double> @mask_cast_extract_v16f32_v2f64_1(<16 x float> %a, <2 x double> %passthru, i8 %mask) {
1454; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_1:
1455; CHECK:       # %bb.0:
1456; CHECK-NEXT:    kmovd %edi, %k1
1457; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm1 {%k1}
1458; CHECK-NEXT:    vmovapd %xmm1, %xmm0
1459; CHECK-NEXT:    vzeroupper
1460; CHECK-NEXT:    retq
1461  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1462  %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
1463  %mask.cast = bitcast i8 %mask to <8 x i1>
1464  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1465  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru
1466  ret <2 x double> %res
1467}
1468
1469define <2 x double> @mask_cast_extract_v16f32_v2f64_1_z(<16 x float> %a, i8 %mask) {
1470; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_1_z:
1471; CHECK:       # %bb.0:
1472; CHECK-NEXT:    kmovd %edi, %k1
1473; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z}
1474; CHECK-NEXT:    vzeroupper
1475; CHECK-NEXT:    retq
1476  %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1477  %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
1478  %mask.cast = bitcast i8 %mask to <8 x i1>
1479  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1480  %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> zeroinitializer
1481  ret <2 x double> %res
1482}
1483
1484define <2 x double> @broadcast_v4f32_0101_from_v2f32_mask(double* %x, <2 x double> %passthru, i8 %mask) {
1485; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask:
1486; CHECK:       # %bb.0:
1487; CHECK-NEXT:    kmovd %esi, %k1
1488; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
1489; CHECK-NEXT:    retq
1490  %q = load double, double* %x, align 1
1491  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
1492  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
1493  %mask.cast = bitcast i8 %mask to <8 x i1>
1494  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1495  %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> %passthru
1496  ret <2 x double> %res
1497}
1498
1499define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) {
1500; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_maskz:
1501; CHECK:       # %bb.0:
1502; CHECK-NEXT:    kmovd %esi, %k1
1503; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
1504; CHECK-NEXT:    retq
1505  %q = load double, double* %x, align 1
1506  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
1507  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
1508  %mask.cast = bitcast i8 %mask to <8 x i1>
1509  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1510  %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> zeroinitializer
1511  ret <2 x double> %res
1512}
1513
1514define <8 x float> @test_broadcast_2f64_8f32_mask(<2 x double> *%p, i8 %mask, <8 x float> %passthru) nounwind {
1515; CHECK-LABEL: test_broadcast_2f64_8f32_mask:
1516; CHECK:       # %bb.0:
1517; CHECK-NEXT:    kmovd %esi, %k1
1518; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1519; CHECK-NEXT:    retq
1520 %1 = load <2 x double>, <2 x double> *%p
1521 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1522 %3 = bitcast <4 x double> %2 to <8 x float>
1523 %mask.cast = bitcast i8 %mask to <8 x i1>
1524 %res = select <8 x i1> %mask.cast, <8 x float> %3, <8 x float> %passthru
1525 ret <8 x float> %res
1526}
1527
1528define <8 x float> @test_broadcast_2f64_8f32_maskz(<2 x double> *%p, i8 %mask) nounwind {
1529; CHECK-LABEL: test_broadcast_2f64_8f32_maskz:
1530; CHECK:       # %bb.0:
1531; CHECK-NEXT:    kmovd %esi, %k1
1532; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1533; CHECK-NEXT:    retq
1534 %1 = load <2 x double>, <2 x double> *%p
1535 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1536 %3 = bitcast <4 x double> %2 to <8 x float>
1537 %mask.cast = bitcast i8 %mask to <8 x i1>
1538 %res = select <8 x i1> %mask.cast, <8 x float> %3, <8 x float> zeroinitializer
1539 ret <8 x float> %res
1540}
1541
1542define <8 x i32> @test_broadcast_2i64_8i32_mask(<2 x i64> *%p, i8 %mask, <8 x i32> %passthru) nounwind {
1543; CHECK-LABEL: test_broadcast_2i64_8i32_mask:
1544; CHECK:       # %bb.0:
1545; CHECK-NEXT:    kmovd %esi, %k1
1546; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1547; CHECK-NEXT:    retq
1548 %1 = load <2 x i64>, <2 x i64> *%p
1549 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1550 %3 = bitcast <4 x i64> %2 to <8 x i32>
1551 %mask.cast = bitcast i8 %mask to <8 x i1>
1552 %res = select <8 x i1> %mask.cast, <8 x i32> %3, <8 x i32> %passthru
1553 ret <8 x i32> %res
1554}
1555
1556define <8 x i32> @test_broadcast_2i64_8i32_maskz(<2 x i64> *%p, i8 %mask) nounwind {
1557; CHECK-LABEL: test_broadcast_2i64_8i32_maskz:
1558; CHECK:       # %bb.0:
1559; CHECK-NEXT:    kmovd %esi, %k1
1560; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1561; CHECK-NEXT:    retq
1562 %1 = load <2 x i64>, <2 x i64> *%p
1563 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1564 %3 = bitcast <4 x i64> %2 to <8 x i32>
1565 %mask.cast = bitcast i8 %mask to <8 x i1>
1566 %res = select <8 x i1> %mask.cast, <8 x i32> %3, <8 x i32> zeroinitializer
1567 ret <8 x i32> %res
1568}
1569
1570define <16 x float> @test_broadcast_2f64_16f32_mask(<2 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind {
1571; CHECK-LABEL: test_broadcast_2f64_16f32_mask:
1572; CHECK:       # %bb.0:
1573; CHECK-NEXT:    kmovd %esi, %k1
1574; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1575; CHECK-NEXT:    retq
1576 %1 = load <2 x double>, <2 x double> *%p
1577 %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1578 %3 = bitcast <8 x double> %2 to <16 x float>
1579 %mask.cast = bitcast i16 %mask to <16 x i1>
1580 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru
1581 ret <16 x float> %res
1582}
1583
1584define <16 x float> @test_broadcast_2f64_16f32_maskz(<2 x double> *%p, i16 %mask) nounwind {
1585; CHECK-LABEL: test_broadcast_2f64_16f32_maskz:
1586; CHECK:       # %bb.0:
1587; CHECK-NEXT:    kmovd %esi, %k1
1588; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1589; CHECK-NEXT:    retq
1590 %1 = load <2 x double>, <2 x double> *%p
1591 %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1592 %3 = bitcast <8 x double> %2 to <16 x float>
1593 %mask.cast = bitcast i16 %mask to <16 x i1>
1594 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> zeroinitializer
1595 ret <16 x float> %res
1596}
1597
1598define <16 x i32> @test_broadcast_2i64_16i32_mask(<2 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind {
1599; CHECK-LABEL: test_broadcast_2i64_16i32_mask:
1600; CHECK:       # %bb.0:
1601; CHECK-NEXT:    kmovd %esi, %k1
1602; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1603; CHECK-NEXT:    retq
1604 %1 = load <2 x i64>, <2 x i64> *%p
1605 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1606 %3 = bitcast <8 x i64> %2 to <16 x i32>
1607 %mask.cast = bitcast i16 %mask to <16 x i1>
1608 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru
1609 ret <16 x i32> %res
1610}
1611
1612define <16 x i32> @test_broadcast_2i64_16i32_maskz(<2 x i64> *%p, i16 %mask) nounwind {
1613; CHECK-LABEL: test_broadcast_2i64_16i32_maskz:
1614; CHECK:       # %bb.0:
1615; CHECK-NEXT:    kmovd %esi, %k1
1616; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1617; CHECK-NEXT:    retq
1618 %1 = load <2 x i64>, <2 x i64> *%p
1619 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1620 %3 = bitcast <8 x i64> %2 to <16 x i32>
1621 %mask.cast = bitcast i16 %mask to <16 x i1>
1622 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> zeroinitializer
1623 ret <16 x i32> %res
1624}
1625
1626define <16 x float> @test_broadcast_4f64_16f32_mask(<4 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind {
1627; CHECK-LABEL: test_broadcast_4f64_16f32_mask:
1628; CHECK:       # %bb.0:
1629; CHECK-NEXT:    kmovd %esi, %k1
1630; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1631; CHECK-NEXT:    retq
1632 %1 = load <4 x double>, <4 x double> *%p
1633 %2 = shufflevector <4 x double> %1, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1634 %3 = bitcast <8 x double> %2 to <16 x float>
1635 %mask.cast = bitcast i16 %mask to <16 x i1>
1636 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru
1637 ret <16 x float> %res
1638}
1639
1640define <16 x float> @test_broadcast_4f64_16f32_maskz(<4 x double> *%p, i16 %mask) nounwind {
1641; CHECK-LABEL: test_broadcast_4f64_16f32_maskz:
1642; CHECK:       # %bb.0:
1643; CHECK-NEXT:    kmovd %esi, %k1
1644; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1645; CHECK-NEXT:    retq
1646 %1 = load <4 x double>, <4 x double> *%p
1647 %2 = shufflevector <4 x double> %1, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1648 %3 = bitcast <8 x double> %2 to <16 x float>
1649 %mask.cast = bitcast i16 %mask to <16 x i1>
1650 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> zeroinitializer
1651 ret <16 x float> %res
1652}
1653
1654define <16 x i32> @test_broadcast_4i64_16i32_mask(<4 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind {
1655; CHECK-LABEL: test_broadcast_4i64_16i32_mask:
1656; CHECK:       # %bb.0:
1657; CHECK-NEXT:    kmovd %esi, %k1
1658; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1659; CHECK-NEXT:    retq
1660 %1 = load <4 x i64>, <4 x i64> *%p
1661 %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1662 %3 = bitcast <8 x i64> %2 to <16 x i32>
1663 %mask.cast = bitcast i16 %mask to <16 x i1>
1664 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru
1665 ret <16 x i32> %res
1666}
1667
1668define <16 x i32> @test_broadcast_4i64_16i32_maskz(<4 x i64> *%p, i16 %mask) nounwind {
1669; CHECK-LABEL: test_broadcast_4i64_16i32_maskz:
1670; CHECK:       # %bb.0:
1671; CHECK-NEXT:    kmovd %esi, %k1
1672; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1673; CHECK-NEXT:    retq
1674 %1 = load <4 x i64>, <4 x i64> *%p
1675 %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1676 %3 = bitcast <8 x i64> %2 to <16 x i32>
1677 %mask.cast = bitcast i16 %mask to <16 x i1>
1678 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> zeroinitializer
1679 ret <16 x i32> %res
1680}
1681
1682define <4 x double> @test_broadcast_4f32_4f64_mask(<4 x float> *%p, i8 %mask, <4 x double> %passthru) nounwind {
1683; CHECK-LABEL: test_broadcast_4f32_4f64_mask:
1684; CHECK:       # %bb.0:
1685; CHECK-NEXT:    kmovd %esi, %k1
1686; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
1687; CHECK-NEXT:    retq
1688 %1 = load <4 x float>, <4 x float> *%p
1689 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1690 %3 = bitcast <8 x float> %2 to <4 x double>
1691 %mask.cast = bitcast i8 %mask to <8 x i1>
1692 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1693 %res = select <4 x i1> %mask.extract, <4 x double> %3, <4 x double> %passthru
1694 ret <4 x double> %res
1695}
1696
1697define <4 x double> @test_broadcast_4f32_4f64_maskz(<4 x float> *%p, i8 %mask) nounwind {
1698; CHECK-LABEL: test_broadcast_4f32_4f64_maskz:
1699; CHECK:       # %bb.0:
1700; CHECK-NEXT:    kmovd %esi, %k1
1701; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
1702; CHECK-NEXT:    retq
1703 %1 = load <4 x float>, <4 x float> *%p
1704 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1705 %3 = bitcast <8 x float> %2 to <4 x double>
1706 %mask.cast = bitcast i8 %mask to <8 x i1>
1707 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1708 %res = select <4 x i1> %mask.extract, <4 x double> %3, <4 x double> zeroinitializer
1709 ret <4 x double> %res
1710}
1711
1712define <4 x i64> @test_broadcast_4i32_4i64_mask(<4 x i32> *%p, i8 %mask, <4 x i64> %passthru) nounwind {
1713; CHECK-LABEL: test_broadcast_4i32_4i64_mask:
1714; CHECK:       # %bb.0:
1715; CHECK-NEXT:    kmovd %esi, %k1
1716; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
1717; CHECK-NEXT:    retq
1718 %1 = load <4 x i32>, <4 x i32> *%p
1719 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1720 %3 = bitcast <8 x i32> %2 to <4 x i64>
1721 %mask.cast = bitcast i8 %mask to <8 x i1>
1722 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1723 %res = select <4 x i1> %mask.extract, <4 x i64> %3, <4 x i64> %passthru
1724 ret <4 x i64> %res
1725}
1726
1727define <4 x i64> @test_broadcast_4i32_4i64_maskz(<4 x i32> *%p, i8 %mask) nounwind {
1728; CHECK-LABEL: test_broadcast_4i32_4i64_maskz:
1729; CHECK:       # %bb.0:
1730; CHECK-NEXT:    kmovd %esi, %k1
1731; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
1732; CHECK-NEXT:    retq
1733 %1 = load <4 x i32>, <4 x i32> *%p
1734 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1735 %3 = bitcast <8 x i32> %2 to <4 x i64>
1736 %mask.cast = bitcast i8 %mask to <8 x i1>
1737 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1738 %res = select <4 x i1> %mask.extract, <4 x i64> %3, <4 x i64> zeroinitializer
1739 ret <4 x i64> %res
1740}
1741
1742define <8 x double> @test_broadcast_4f32_8f64_mask(<4 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind {
1743; CHECK-LABEL: test_broadcast_4f32_8f64_mask:
1744; CHECK:       # %bb.0:
1745; CHECK-NEXT:    kmovd %esi, %k1
1746; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
1747; CHECK-NEXT:    retq
1748 %1 = load <4 x float>, <4 x float> *%p
1749 %2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1750 %3 = bitcast <16 x float> %2 to <8 x double>
1751 %mask.cast = bitcast i8 %mask to <8 x i1>
1752 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru
1753 ret <8 x double> %res
1754}
1755
1756define <8 x double> @test_broadcast_4f32_8f64_maskz(<4 x float> *%p, i8 %mask) nounwind {
1757; CHECK-LABEL: test_broadcast_4f32_8f64_maskz:
1758; CHECK:       # %bb.0:
1759; CHECK-NEXT:    kmovd %esi, %k1
1760; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
1761; CHECK-NEXT:    retq
1762 %1 = load <4 x float>, <4 x float> *%p
1763 %2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1764 %3 = bitcast <16 x float> %2 to <8 x double>
1765 %mask.cast = bitcast i8 %mask to <8 x i1>
1766 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> zeroinitializer
1767 ret <8 x double> %res
1768}
1769
1770define <8 x i64> @test_broadcast_4i32_8i64_mask(<4 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind {
1771; CHECK-LABEL: test_broadcast_4i32_8i64_mask:
1772; CHECK:       # %bb.0:
1773; CHECK-NEXT:    kmovd %esi, %k1
1774; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
1775; CHECK-NEXT:    retq
1776 %1 = load <4 x i32>, <4 x i32> *%p
1777 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1778 %3 = bitcast <16 x i32> %2 to <8 x i64>
1779 %mask.cast = bitcast i8 %mask to <8 x i1>
1780 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru
1781 ret <8 x i64> %res
1782}
1783
1784define <8 x i64> @test_broadcast_4i32_8i64_maskz(<4 x i32> *%p, i8 %mask) nounwind {
1785; CHECK-LABEL: test_broadcast_4i32_8i64_maskz:
1786; CHECK:       # %bb.0:
1787; CHECK-NEXT:    kmovd %esi, %k1
1788; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
1789; CHECK-NEXT:    retq
1790 %1 = load <4 x i32>, <4 x i32> *%p
1791 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1792 %3 = bitcast <16 x i32> %2 to <8 x i64>
1793 %mask.cast = bitcast i8 %mask to <8 x i1>
1794 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> zeroinitializer
1795 ret <8 x i64> %res
1796}
1797
1798define <8 x double> @test_broadcast_8f32_8f64_mask(<8 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind {
1799; CHECK-LABEL: test_broadcast_8f32_8f64_mask:
1800; CHECK:       # %bb.0:
1801; CHECK-NEXT:    kmovd %esi, %k1
1802; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1803; CHECK-NEXT:    retq
1804 %1 = load <8 x float>, <8 x float> *%p
1805 %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1806 %3 = bitcast <16 x float> %2 to <8 x double>
1807 %mask.cast = bitcast i8 %mask to <8 x i1>
1808 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru
1809 ret <8 x double> %res
1810}
1811
1812define <8 x double> @test_broadcast_8f32_8f64_maskz(<8 x float> *%p, i8 %mask) nounwind {
1813; CHECK-LABEL: test_broadcast_8f32_8f64_maskz:
1814; CHECK:       # %bb.0:
1815; CHECK-NEXT:    kmovd %esi, %k1
1816; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1817; CHECK-NEXT:    retq
1818 %1 = load <8 x float>, <8 x float> *%p
1819 %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1820 %3 = bitcast <16 x float> %2 to <8 x double>
1821 %mask.cast = bitcast i8 %mask to <8 x i1>
1822 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> zeroinitializer
1823 ret <8 x double> %res
1824}
1825
1826define <8 x i64> @test_broadcast_8i32_8i64_mask(<8 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind {
1827; CHECK-LABEL: test_broadcast_8i32_8i64_mask:
1828; CHECK:       # %bb.0:
1829; CHECK-NEXT:    kmovd %esi, %k1
1830; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1831; CHECK-NEXT:    retq
1832 %1 = load <8 x i32>, <8 x i32> *%p
1833 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1834 %3 = bitcast <16 x i32> %2 to <8 x i64>
1835 %mask.cast = bitcast i8 %mask to <8 x i1>
1836 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru
1837 ret <8 x i64> %res
1838}
1839
1840define <8 x i64> @test_broadcast_8i32_8i64_maskz(<8 x i32> *%p, i8 %mask) nounwind {
1841; CHECK-LABEL: test_broadcast_8i32_8i64_maskz:
1842; CHECK:       # %bb.0:
1843; CHECK-NEXT:    kmovd %esi, %k1
1844; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1845; CHECK-NEXT:    retq
1846 %1 = load <8 x i32>, <8 x i32> *%p
1847 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1848 %3 = bitcast <16 x i32> %2 to <8 x i64>
1849 %mask.cast = bitcast i8 %mask to <8 x i1>
1850 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> zeroinitializer
1851 ret <8 x i64> %res
1852}
1853
1854define <4 x float> @test_broadcastf32x2_v4f32(<4 x float> %vec, <4 x float> %passthru, i8 %mask) {
1855; CHECK-LABEL: test_broadcastf32x2_v4f32:
1856; CHECK:       # %bb.0:
1857; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1858; CHECK-NEXT:    kmovd %edi, %k1
1859; CHECK-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
1860; CHECK-NEXT:    retq
1861  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1862  %mask.cast = bitcast i8 %mask to <8 x i1>
1863  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1864  %res = select <4 x i1> %mask.extract, <4 x float> %shuf, <4 x float> %passthru
1865  ret <4 x float> %res
1866}
1867
1868define <4 x float> @test_broadcastf32x2_v4f32_z(<4 x float> %vec, i8 %mask) {
1869; CHECK-LABEL: test_broadcastf32x2_v4f32_z:
1870; CHECK:       # %bb.0:
1871; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1872; CHECK-NEXT:    kmovd %edi, %k1
1873; CHECK-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
1874; CHECK-NEXT:    retq
1875  %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1876  %mask.cast = bitcast i8 %mask to <8 x i1>
1877  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1878  %res = select <4 x i1> %mask.extract, <4 x float> %shuf, <4 x float> zeroinitializer
1879  ret <4 x float> %res
1880}
1881
1882define <4 x i32> @test_broadcasti32x2_v4i32(<4 x i32> %vec, <4 x i32> %passthru, i8 %mask) {
1883; CHECK-LABEL: test_broadcasti32x2_v4i32:
1884; CHECK:       # %bb.0:
1885; CHECK-NEXT:    kmovd %edi, %k1
1886; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1]
1887; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
1888; CHECK-NEXT:    retq
1889  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1890  %mask.cast = bitcast i8 %mask to <8 x i1>
1891  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1892  %res = select <4 x i1> %mask.extract, <4 x i32> %shuf, <4 x i32> %passthru
1893  ret <4 x i32> %res
1894}
1895
1896define <4 x i32> @test_broadcasti32x2_v4i32_z(<4 x i32> %vec, i8 %mask) {
1897; CHECK-LABEL: test_broadcasti32x2_v4i32_z:
1898; CHECK:       # %bb.0:
1899; CHECK-NEXT:    kmovd %edi, %k1
1900; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1]
1901; CHECK-NEXT:    retq
1902  %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1903  %mask.cast = bitcast i8 %mask to <8 x i1>
1904  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1905  %res = select <4 x i1> %mask.extract, <4 x i32> %shuf, <4 x i32> zeroinitializer
1906  ret <4 x i32> %res
1907}
1908
1909define <8 x float> @test_broadcastf32x2_v8f32(<8 x float> %vec, <8 x float> %passthru, i8 %mask) {
1910; CHECK-LABEL: test_broadcastf32x2_v8f32:
1911; CHECK:       # %bb.0:
1912; CHECK-NEXT:    kmovd %edi, %k1
1913; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
1914; CHECK-NEXT:    vmovapd %ymm1, %ymm0
1915; CHECK-NEXT:    retq
1916  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1917  %mask.cast = bitcast i8 %mask to <8 x i1>
1918  %res = select <8 x i1> %mask.cast, <8 x float> %shuf, <8 x float> %passthru
1919  ret <8 x float> %res
1920}
1921
1922define <8 x float> @test_broadcastf32x2_v8f32_z(<8 x float> %vec, i8 %mask) {
1923; CHECK-LABEL: test_broadcastf32x2_v8f32_z:
1924; CHECK:       # %bb.0:
1925; CHECK-NEXT:    kmovd %edi, %k1
1926; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
1927; CHECK-NEXT:    retq
1928  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1929  %mask.cast = bitcast i8 %mask to <8 x i1>
1930  %res = select <8 x i1> %mask.cast, <8 x float> %shuf, <8 x float> zeroinitializer
1931  ret <8 x float> %res
1932}
1933
1934define <8 x i32> @test_broadcasti32x2_v8i32(<8 x i32> %vec, <8 x i32> %passthru, i8 %mask) {
1935; CHECK-LABEL: test_broadcasti32x2_v8i32:
1936; CHECK:       # %bb.0:
1937; CHECK-NEXT:    kmovd %edi, %k1
1938; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
1939; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
1940; CHECK-NEXT:    retq
1941  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1942  %mask.cast = bitcast i8 %mask to <8 x i1>
1943  %res = select <8 x i1> %mask.cast, <8 x i32> %shuf, <8 x i32> %passthru
1944  ret <8 x i32> %res
1945}
1946
1947define <8 x i32> @test_broadcasti32x2_v8i32_z(<8 x i32> %vec, i8 %mask) {
1948; CHECK-LABEL: test_broadcasti32x2_v8i32_z:
1949; CHECK:       # %bb.0:
1950; CHECK-NEXT:    kmovd %edi, %k1
1951; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
1952; CHECK-NEXT:    retq
1953  %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1954  %mask.cast = bitcast i8 %mask to <8 x i1>
1955  %res = select <8 x i1> %mask.cast, <8 x i32> %shuf, <8 x i32> zeroinitializer
1956  ret <8 x i32> %res
1957}
1958
1959define <16 x float> @test_broadcastf32x2_v16f32_z(<16 x float> %vec, i16 %mask) {
1960; CHECK-LABEL: test_broadcastf32x2_v16f32_z:
1961; CHECK:       # %bb.0:
1962; CHECK-NEXT:    kmovd %edi, %k1
1963; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1964; CHECK-NEXT:    retq
1965  %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1966  %mask.cast = bitcast i16 %mask to <16 x i1>
1967  %res = select <16 x i1> %mask.cast, <16 x float> %shuf, <16 x float> zeroinitializer
1968  ret <16 x float> %res
1969}
1970
1971define <16 x i32> @test_broadcasti32x2_v16i32(<16 x i32> %vec, <16 x i32> %passthru, i16 %mask) {
1972; CHECK-LABEL: test_broadcasti32x2_v16i32:
1973; CHECK:       # %bb.0:
1974; CHECK-NEXT:    kmovd %edi, %k1
1975; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1976; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
1977; CHECK-NEXT:    retq
1978  %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1979  %mask.cast = bitcast i16 %mask to <16 x i1>
1980  %res = select <16 x i1> %mask.cast, <16 x i32> %shuf, <16 x i32> %passthru
1981  ret <16 x i32> %res
1982}
1983
1984define <16 x float> @test_broadcastf32x2_v16f32(<16 x float> %vec, <16 x float> %passthru, i16 %mask) {
1985; CHECK-LABEL: test_broadcastf32x2_v16f32:
1986; CHECK:       # %bb.0:
1987; CHECK-NEXT:    kmovd %edi, %k1
1988; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1989; CHECK-NEXT:    vmovapd %zmm1, %zmm0
1990; CHECK-NEXT:    retq
1991  %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1992  %mask.cast = bitcast i16 %mask to <16 x i1>
1993  %res = select <16 x i1> %mask.cast, <16 x float> %shuf, <16 x float> %passthru
1994  ret <16 x float> %res
1995}
1996
1997define <16 x i32> @test_broadcasti32x2_v16i32_z(<16 x i32> %vec, i16 %mask) {
1998; CHECK-LABEL: test_broadcasti32x2_v16i32_z:
1999; CHECK:       # %bb.0:
2000; CHECK-NEXT:    kmovd %edi, %k1
2001; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2002; CHECK-NEXT:    retq
2003  %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
2004  %mask.cast = bitcast i16 %mask to <16 x i1>
2005  %res = select <16 x i1> %mask.cast, <16 x i32> %shuf, <16 x i32> zeroinitializer
2006  ret <16 x i32> %res
2007}
2008
2009define <16 x i8> @mask_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) {
2010; CHECK-LABEL: mask_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16:
2011; CHECK:       # %bb.0:
2012; CHECK-NEXT:    kmovd %edi, %k1
2013; CHECK-NEXT:    vpalignr {{.*#+}} xmm2 {%k1} = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2014; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
2015; CHECK-NEXT:    retq
2016  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
2017  %mask.cast = bitcast i16 %mask to <16 x i1>
2018  %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru
2019  ret <16 x i8> %res
2020}
2021
2022define <16 x i8> @maskz_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
2023; CHECK-LABEL: maskz_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16:
2024; CHECK:       # %bb.0:
2025; CHECK-NEXT:    kmovd %edi, %k1
2026; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2027; CHECK-NEXT:    retq
2028  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
2029  %mask.cast = bitcast i16 %mask to <16 x i1>
2030  %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer
2031  ret <16 x i8> %res
2032}
2033
2034define <16 x i8> @mask_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) {
2035; CHECK-LABEL: mask_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19:
2036; CHECK:       # %bb.0:
2037; CHECK-NEXT:    kmovd %edi, %k1
2038; CHECK-NEXT:    vpalignr {{.*#+}} xmm2 {%k1} = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
2039; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
2040; CHECK-NEXT:    retq
2041  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
2042  %mask.cast = bitcast i16 %mask to <16 x i1>
2043  %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru
2044  ret <16 x i8> %res
2045}
2046
2047define <16 x i8> @maskz_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
2048; CHECK-LABEL: maskz_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19:
2049; CHECK:       # %bb.0:
2050; CHECK-NEXT:    kmovd %edi, %k1
2051; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
2052; CHECK-NEXT:    retq
2053  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
2054  %mask.cast = bitcast i16 %mask to <16 x i1>
2055  %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer
2056  ret <16 x i8> %res
2057}
2058
2059define <16 x i8> @mask_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) {
2060; CHECK-LABEL: mask_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23:
2061; CHECK:       # %bb.0:
2062; CHECK-NEXT:    kmovd %edi, %k1
2063; CHECK-NEXT:    vpalignr {{.*#+}} xmm2 {%k1} = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
2064; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
2065; CHECK-NEXT:    retq
2066  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
2067  %mask.cast = bitcast i16 %mask to <16 x i1>
2068  %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru
2069  ret <16 x i8> %res
2070}
2071
2072define <16 x i8> @maskz_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
2073; CHECK-LABEL: maskz_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23:
2074; CHECK:       # %bb.0:
2075; CHECK-NEXT:    kmovd %edi, %k1
2076; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
2077; CHECK-NEXT:    retq
2078  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
2079  %mask.cast = bitcast i16 %mask to <16 x i1>
2080  %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer
2081  ret <16 x i8> %res
2082}
2083