• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c
6
7define <8 x i64> @test_mm512_broadcastd_epi32(<2 x i64> %a0) {
8; X32-LABEL: test_mm512_broadcastd_epi32:
9; X32:       # BB#0:
10; X32-NEXT:    vpbroadcastd %xmm0, %zmm0
11; X32-NEXT:    retl
12;
13; X64-LABEL: test_mm512_broadcastd_epi32:
14; X64:       # BB#0:
15; X64-NEXT:    vpbroadcastd %xmm0, %zmm0
16; X64-NEXT:    retq
17  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
18  %res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <16 x i32> zeroinitializer
19  %res1 = bitcast <16 x i32> %res0 to <8 x i64>
20  ret <8 x i64> %res1
21}
22
23define <8 x i64> @test_mm512_mask_broadcastd_epi32(<8 x i64> %a0, i16 %a1, <2 x i64> %a2) {
24; X32-LABEL: test_mm512_mask_broadcastd_epi32:
25; X32:       # BB#0:
26; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
27; X32-NEXT:    kmovw %eax, %k1
28; X32-NEXT:    vpbroadcastd %xmm1, %zmm0 {%k1}
29; X32-NEXT:    retl
30;
31; X64-LABEL: test_mm512_mask_broadcastd_epi32:
32; X64:       # BB#0:
33; X64-NEXT:    kmovw %edi, %k1
34; X64-NEXT:    vpbroadcastd %xmm1, %zmm0 {%k1}
35; X64-NEXT:    retq
36  %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
37  %arg1 = bitcast i16 %a1 to <16 x i1>
38  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
39  %res0 = shufflevector <4 x i32> %arg2, <4 x i32> undef, <16 x i32> zeroinitializer
40  %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
41  %res2 = bitcast <16 x i32> %res1 to <8 x i64>
42  ret <8 x i64> %res2
43}
44
45define <8 x i64> @test_mm512_maskz_broadcastd_epi32(i16 %a0, <2 x i64> %a1) {
46; X32-LABEL: test_mm512_maskz_broadcastd_epi32:
47; X32:       # BB#0:
48; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
49; X32-NEXT:    kmovw %eax, %k1
50; X32-NEXT:    vpbroadcastd %xmm0, %zmm0 {%k1} {z}
51; X32-NEXT:    retl
52;
53; X64-LABEL: test_mm512_maskz_broadcastd_epi32:
54; X64:       # BB#0:
55; X64-NEXT:    kmovw %edi, %k1
56; X64-NEXT:    vpbroadcastd %xmm0, %zmm0 {%k1} {z}
57; X64-NEXT:    retq
58  %arg0 = bitcast i16 %a0 to <16 x i1>
59  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
60  %res0 = shufflevector <4 x i32> %arg1, <4 x i32> undef, <16 x i32> zeroinitializer
61  %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
62  %res2 = bitcast <16 x i32> %res1 to <8 x i64>
63  ret <8 x i64> %res2
64}
65
66define <8 x i64> @test_mm512_broadcastq_epi64(<2 x i64> %a0) {
67; X32-LABEL: test_mm512_broadcastq_epi64:
68; X32:       # BB#0:
69; X32-NEXT:    vpbroadcastq %xmm0, %zmm0
70; X32-NEXT:    retl
71;
72; X64-LABEL: test_mm512_broadcastq_epi64:
73; X64:       # BB#0:
74; X64-NEXT:    vpbroadcastq %xmm0, %zmm0
75; X64-NEXT:    retq
76  %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <8 x i32> zeroinitializer
77  ret <8 x i64> %res
78}
79
80define <8 x i64> @test_mm512_mask_broadcastq_epi64(<8 x i64> %a0, i8 %a1, <2 x i64> %a2) {
81; X32-LABEL: test_mm512_mask_broadcastq_epi64:
82; X32:       # BB#0:
83; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
84; X32-NEXT:    kmovw %eax, %k1
85; X32-NEXT:    vpbroadcastq %xmm1, %zmm0 {%k1}
86; X32-NEXT:    retl
87;
88; X64-LABEL: test_mm512_mask_broadcastq_epi64:
89; X64:       # BB#0:
90; X64-NEXT:    kmovw %edi, %k1
91; X64-NEXT:    vpbroadcastq %xmm1, %zmm0 {%k1}
92; X64-NEXT:    retq
93  %arg1 = bitcast i8 %a1 to <8 x i1>
94  %res0 = shufflevector <2 x i64> %a2, <2 x i64> undef, <8 x i32> zeroinitializer
95  %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
96  ret <8 x i64> %res1
97}
98
99define <8 x i64> @test_mm512_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
100; X32-LABEL: test_mm512_maskz_broadcastq_epi64:
101; X32:       # BB#0:
102; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
103; X32-NEXT:    kmovw %eax, %k1
104; X32-NEXT:    vpbroadcastq %xmm0, %zmm0 {%k1} {z}
105; X32-NEXT:    retl
106;
107; X64-LABEL: test_mm512_maskz_broadcastq_epi64:
108; X64:       # BB#0:
109; X64-NEXT:    kmovw %edi, %k1
110; X64-NEXT:    vpbroadcastq %xmm0, %zmm0 {%k1} {z}
111; X64-NEXT:    retq
112  %arg0 = bitcast i8 %a0 to <8 x i1>
113  %res0 = shufflevector <2 x i64> %a1, <2 x i64> undef, <8 x i32> zeroinitializer
114  %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
115  ret <8 x i64> %res1
116}
117
118define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a0) {
119; X32-LABEL: test_mm512_broadcastsd_pd:
120; X32:       # BB#0:
121; X32-NEXT:    vbroadcastsd %xmm0, %zmm0
122; X32-NEXT:    retl
123;
124; X64-LABEL: test_mm512_broadcastsd_pd:
125; X64:       # BB#0:
126; X64-NEXT:    vbroadcastsd %xmm0, %zmm0
127; X64-NEXT:    retq
128  %res = shufflevector <2 x double> %a0, <2 x double> undef, <8 x i32> zeroinitializer
129  ret <8 x double> %res
130}
131
132define <8 x double> @test_mm512_mask_broadcastsd_pd(<8 x double> %a0, i8 %a1, <2 x double> %a2) {
133; X32-LABEL: test_mm512_mask_broadcastsd_pd:
134; X32:       # BB#0:
135; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
136; X32-NEXT:    kmovw %eax, %k1
137; X32-NEXT:    vbroadcastsd %xmm1, %zmm0 {%k1}
138; X32-NEXT:    retl
139;
140; X64-LABEL: test_mm512_mask_broadcastsd_pd:
141; X64:       # BB#0:
142; X64-NEXT:    kmovw %edi, %k1
143; X64-NEXT:    vbroadcastsd %xmm1, %zmm0 {%k1}
144; X64-NEXT:    retq
145  %arg1 = bitcast i8 %a1 to <8 x i1>
146  %res0 = shufflevector <2 x double> %a2, <2 x double> undef, <8 x i32> zeroinitializer
147  %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
148  ret <8 x double> %res1
149}
150
151define <8 x double> @test_mm512_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
152; X32-LABEL: test_mm512_maskz_broadcastsd_pd:
153; X32:       # BB#0:
154; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
155; X32-NEXT:    kmovw %eax, %k1
156; X32-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
157; X32-NEXT:    retl
158;
159; X64-LABEL: test_mm512_maskz_broadcastsd_pd:
160; X64:       # BB#0:
161; X64-NEXT:    kmovw %edi, %k1
162; X64-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
163; X64-NEXT:    retq
164  %arg0 = bitcast i8 %a0 to <8 x i1>
165  %res0 = shufflevector <2 x double> %a1, <2 x double> undef, <8 x i32> zeroinitializer
166  %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
167  ret <8 x double> %res1
168}
169
170define <16 x float> @test_mm512_broadcastss_ps(<4 x float> %a0) {
171; X32-LABEL: test_mm512_broadcastss_ps:
172; X32:       # BB#0:
173; X32-NEXT:    vbroadcastss %xmm0, %zmm0
174; X32-NEXT:    retl
175;
176; X64-LABEL: test_mm512_broadcastss_ps:
177; X64:       # BB#0:
178; X64-NEXT:    vbroadcastss %xmm0, %zmm0
179; X64-NEXT:    retq
180  %res = shufflevector <4 x float> %a0, <4 x float> undef, <16 x i32> zeroinitializer
181  ret <16 x float> %res
182}
183
184define <16 x float> @test_mm512_mask_broadcastss_ps(<16 x float> %a0, i16 %a1, <4 x float> %a2) {
185; X32-LABEL: test_mm512_mask_broadcastss_ps:
186; X32:       # BB#0:
187; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
188; X32-NEXT:    kmovw %eax, %k1
189; X32-NEXT:    vbroadcastss %xmm1, %zmm0 {%k1}
190; X32-NEXT:    retl
191;
192; X64-LABEL: test_mm512_mask_broadcastss_ps:
193; X64:       # BB#0:
194; X64-NEXT:    kmovw %edi, %k1
195; X64-NEXT:    vbroadcastss %xmm1, %zmm0 {%k1}
196; X64-NEXT:    retq
197  %arg1 = bitcast i16 %a1 to <16 x i1>
198  %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <16 x i32> zeroinitializer
199  %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
200  ret <16 x float> %res1
201}
202
203define <16 x float> @test_mm512_maskz_broadcastss_ps(i16 %a0, <4 x float> %a1) {
204; X32-LABEL: test_mm512_maskz_broadcastss_ps:
205; X32:       # BB#0:
206; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
207; X32-NEXT:    kmovw %eax, %k1
208; X32-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
209; X32-NEXT:    retl
210;
211; X64-LABEL: test_mm512_maskz_broadcastss_ps:
212; X64:       # BB#0:
213; X64-NEXT:    kmovw %edi, %k1
214; X64-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
215; X64-NEXT:    retq
216  %arg0 = bitcast i16 %a0 to <16 x i1>
217  %res0 = shufflevector <4 x float> %a1, <4 x float> undef, <16 x i32> zeroinitializer
218  %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
219  ret <16 x float> %res1
220}
221
222define <8 x double> @test_mm512_movddup_pd(<8 x double> %a0) {
223; X32-LABEL: test_mm512_movddup_pd:
224; X32:       # BB#0:
225; X32-NEXT:    vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
226; X32-NEXT:    retl
227;
228; X64-LABEL: test_mm512_movddup_pd:
229; X64:       # BB#0:
230; X64-NEXT:    vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
231; X64-NEXT:    retq
232  %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
233  ret <8 x double> %res
234}
235
236define <8 x double> @test_mm512_mask_movddup_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
237; X32-LABEL: test_mm512_mask_movddup_pd:
238; X32:       # BB#0:
239; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
240; X32-NEXT:    kmovw %eax, %k1
241; X32-NEXT:    vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6]
242; X32-NEXT:    retl
243;
244; X64-LABEL: test_mm512_mask_movddup_pd:
245; X64:       # BB#0:
246; X64-NEXT:    kmovw %edi, %k1
247; X64-NEXT:    vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6]
248; X64-NEXT:    retq
249  %arg1 = bitcast i8 %a1 to <8 x i1>
250  %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
251  %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
252  ret <8 x double> %res1
253}
254
255define <8 x double> @test_mm512_maskz_movddup_pd(i8 %a0, <8 x double> %a1) {
256; X32-LABEL: test_mm512_maskz_movddup_pd:
257; X32:       # BB#0:
258; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
259; X32-NEXT:    kmovw %eax, %k1
260; X32-NEXT:    vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
261; X32-NEXT:    retl
262;
263; X64-LABEL: test_mm512_maskz_movddup_pd:
264; X64:       # BB#0:
265; X64-NEXT:    kmovw %edi, %k1
266; X64-NEXT:    vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
267; X64-NEXT:    retq
268  %arg0 = bitcast i8 %a0 to <8 x i1>
269  %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
270  %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
271  ret <8 x double> %res1
272}
273
274define <16 x float> @test_mm512_movehdup_ps(<16 x float> %a0) {
275; X32-LABEL: test_mm512_movehdup_ps:
276; X32:       # BB#0:
277; X32-NEXT:    vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
278; X32-NEXT:    retl
279;
280; X64-LABEL: test_mm512_movehdup_ps:
281; X64:       # BB#0:
282; X64-NEXT:    vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
283; X64-NEXT:    retq
284  %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
285  ret <16 x float> %res
286}
287
288define <16 x float> @test_mm512_mask_movehdup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
289; X32-LABEL: test_mm512_mask_movehdup_ps:
290; X32:       # BB#0:
291; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
292; X32-NEXT:    kmovw %eax, %k1
293; X32-NEXT:    vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
294; X32-NEXT:    retl
295;
296; X64-LABEL: test_mm512_mask_movehdup_ps:
297; X64:       # BB#0:
298; X64-NEXT:    kmovw %edi, %k1
299; X64-NEXT:    vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
300; X64-NEXT:    retq
301  %arg1 = bitcast i16 %a1 to <16 x i1>
302  %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
303  %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
304  ret <16 x float> %res1
305}
306
307define <16 x float> @test_mm512_maskz_movehdup_ps(i16 %a0, <16 x float> %a1) {
308; X32-LABEL: test_mm512_maskz_movehdup_ps:
309; X32:       # BB#0:
310; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
311; X32-NEXT:    kmovw %eax, %k1
312; X32-NEXT:    vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
313; X32-NEXT:    retl
314;
315; X64-LABEL: test_mm512_maskz_movehdup_ps:
316; X64:       # BB#0:
317; X64-NEXT:    kmovw %edi, %k1
318; X64-NEXT:    vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
319; X64-NEXT:    retq
320  %arg0 = bitcast i16 %a0 to <16 x i1>
321  %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
322  %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
323  ret <16 x float> %res1
324}
325
326define <16 x float> @test_mm512_moveldup_ps(<16 x float> %a0) {
327; X32-LABEL: test_mm512_moveldup_ps:
328; X32:       # BB#0:
329; X32-NEXT:    vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
330; X32-NEXT:    retl
331;
332; X64-LABEL: test_mm512_moveldup_ps:
333; X64:       # BB#0:
334; X64-NEXT:    vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
335; X64-NEXT:    retq
336  %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
337  ret <16 x float> %res
338}
339
340define <16 x float> @test_mm512_mask_moveldup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
341; X32-LABEL: test_mm512_mask_moveldup_ps:
342; X32:       # BB#0:
343; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
344; X32-NEXT:    kmovw %eax, %k1
345; X32-NEXT:    vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
346; X32-NEXT:    retl
347;
348; X64-LABEL: test_mm512_mask_moveldup_ps:
349; X64:       # BB#0:
350; X64-NEXT:    kmovw %edi, %k1
351; X64-NEXT:    vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
352; X64-NEXT:    retq
353  %arg1 = bitcast i16 %a1 to <16 x i1>
354  %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
355  %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
356  ret <16 x float> %res1
357}
358
359define <16 x float> @test_mm512_maskz_moveldup_ps(i16 %a0, <16 x float> %a1) {
360; X32-LABEL: test_mm512_maskz_moveldup_ps:
361; X32:       # BB#0:
362; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
363; X32-NEXT:    kmovw %eax, %k1
364; X32-NEXT:    vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
365; X32-NEXT:    retl
366;
367; X64-LABEL: test_mm512_maskz_moveldup_ps:
368; X64:       # BB#0:
369; X64-NEXT:    kmovw %edi, %k1
370; X64-NEXT:    vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
371; X64-NEXT:    retq
372  %arg0 = bitcast i16 %a0 to <16 x i1>
373  %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
374  %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
375  ret <16 x float> %res1
376}
377
378define <8 x double> @test_mm512_permute_pd(<8 x double> %a0) {
379; X32-LABEL: test_mm512_permute_pd:
380; X32:       # BB#0:
381; X32-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,2,4,4,6,6]
382; X32-NEXT:    retl
383;
384; X64-LABEL: test_mm512_permute_pd:
385; X64:       # BB#0:
386; X64-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,2,4,4,6,6]
387; X64-NEXT:    retq
388  %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
389  ret <8 x double> %res
390}
391
392define <8 x double> @test_mm512_mask_permute_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
393; X32-LABEL: test_mm512_mask_permute_pd:
394; X32:       # BB#0:
395; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
396; X32-NEXT:    kmovw %eax, %k1
397; X32-NEXT:    vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6]
398; X32-NEXT:    retl
399;
400; X64-LABEL: test_mm512_mask_permute_pd:
401; X64:       # BB#0:
402; X64-NEXT:    kmovw %edi, %k1
403; X64-NEXT:    vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6]
404; X64-NEXT:    retq
405  %arg1 = bitcast i8 %a1 to <8 x i1>
406  %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
407  %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
408  ret <8 x double> %res1
409}
410
411define <8 x double> @test_mm512_maskz_permute_pd(i8 %a0, <8 x double> %a1) {
412; X32-LABEL: test_mm512_maskz_permute_pd:
413; X32:       # BB#0:
414; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
415; X32-NEXT:    kmovw %eax, %k1
416; X32-NEXT:    vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6]
417; X32-NEXT:    retl
418;
419; X64-LABEL: test_mm512_maskz_permute_pd:
420; X64:       # BB#0:
421; X64-NEXT:    kmovw %edi, %k1
422; X64-NEXT:    vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6]
423; X64-NEXT:    retq
424  %arg0 = bitcast i8 %a0 to <8 x i1>
425  %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
426  %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
427  ret <8 x double> %res1
428}
429
430define <16 x float> @test_mm512_permute_ps(<16 x float> %a0) {
431; X32-LABEL: test_mm512_permute_ps:
432; X32:       # BB#0:
433; X32-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
434; X32-NEXT:    retl
435;
436; X64-LABEL: test_mm512_permute_ps:
437; X64:       # BB#0:
438; X64-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
439; X64-NEXT:    retq
440  %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
441  ret <16 x float> %res
442}
443
444define <16 x float> @test_mm512_mask_permute_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
445; X32-LABEL: test_mm512_mask_permute_ps:
446; X32:       # BB#0:
447; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
448; X32-NEXT:    kmovw %eax, %k1
449; X32-NEXT:    vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
450; X32-NEXT:    retl
451;
452; X64-LABEL: test_mm512_mask_permute_ps:
453; X64:       # BB#0:
454; X64-NEXT:    kmovw %edi, %k1
455; X64-NEXT:    vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
456; X64-NEXT:    retq
457  %arg1 = bitcast i16 %a1 to <16 x i1>
458  %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
459  %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
460  ret <16 x float> %res1
461}
462
463define <16 x float> @test_mm512_maskz_permute_ps(i16 %a0, <16 x float> %a1) {
464; X32-LABEL: test_mm512_maskz_permute_ps:
465; X32:       # BB#0:
466; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
467; X32-NEXT:    kmovw %eax, %k1
468; X32-NEXT:    vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
469; X32-NEXT:    retl
470;
471; X64-LABEL: test_mm512_maskz_permute_ps:
472; X64:       # BB#0:
473; X64-NEXT:    kmovw %edi, %k1
474; X64-NEXT:    vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
475; X64-NEXT:    retq
476  %arg0 = bitcast i16 %a0 to <16 x i1>
477  %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
478  %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
479  ret <16 x float> %res1
480}
481
482define <8 x i64> @test_mm512_permutex_epi64(<8 x i64> %a0) {
483; X32-LABEL: test_mm512_permutex_epi64:
484; X32:       # BB#0:
485; X32-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
486; X32-NEXT:    retl
487;
488; X64-LABEL: test_mm512_permutex_epi64:
489; X64:       # BB#0:
490; X64-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
491; X64-NEXT:    retq
492  %res = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
493  ret <8 x i64> %res
494}
495
496define <8 x i64> @test_mm512_mask_permutex_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2) {
497; X32-LABEL: test_mm512_mask_permutex_epi64:
498; X32:       # BB#0:
499; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
500; X32-NEXT:    kmovw %eax, %k1
501; X32-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
502; X32-NEXT:    retl
503;
504; X64-LABEL: test_mm512_mask_permutex_epi64:
505; X64:       # BB#0:
506; X64-NEXT:    kmovw %edi, %k1
507; X64-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
508; X64-NEXT:    retq
509  %arg1 = bitcast i8 %a1 to <8 x i1>
510  %res0 = shufflevector <8 x i64> %a2, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
511  %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
512  ret <8 x i64> %res1
513}
514
515define <8 x i64> @test_mm512_maskz_permutex_epi64(i8 %a0, <8 x i64> %a1) {
516; X32-LABEL: test_mm512_maskz_permutex_epi64:
517; X32:       # BB#0:
518; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
519; X32-NEXT:    kmovw %eax, %k1
520; X32-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
521; X32-NEXT:    retl
522;
523; X64-LABEL: test_mm512_maskz_permutex_epi64:
524; X64:       # BB#0:
525; X64-NEXT:    kmovw %edi, %k1
526; X64-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
527; X64-NEXT:    retq
528  %arg0 = bitcast i8 %a0 to <8 x i1>
529  %res0 = shufflevector <8 x i64> %a1, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
530  %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
531  ret <8 x i64> %res1
532}
533
534define <8 x double> @test_mm512_permutex_pd(<8 x double> %a0) {
535; X32-LABEL: test_mm512_permutex_pd:
536; X32:       # BB#0:
537; X32-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
538; X32-NEXT:    retl
539;
540; X64-LABEL: test_mm512_permutex_pd:
541; X64:       # BB#0:
542; X64-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
543; X64-NEXT:    retq
544  %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
545  ret <8 x double> %res
546}
547
548define <8 x double> @test_mm512_mask_permutex_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
549; X32-LABEL: test_mm512_mask_permutex_pd:
550; X32:       # BB#0:
551; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
552; X32-NEXT:    kmovw %eax, %k1
553; X32-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
554; X32-NEXT:    retl
555;
556; X64-LABEL: test_mm512_mask_permutex_pd:
557; X64:       # BB#0:
558; X64-NEXT:    kmovw %edi, %k1
559; X64-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
560; X64-NEXT:    retq
561  %arg1 = bitcast i8 %a1 to <8 x i1>
562  %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
563  %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
564  ret <8 x double> %res1
565}
566
567define <8 x double> @test_mm512_maskz_permutex_pd(i8 %a0, <8 x double> %a1) {
568; X32-LABEL: test_mm512_maskz_permutex_pd:
569; X32:       # BB#0:
570; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
571; X32-NEXT:    kmovw %eax, %k1
572; X32-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
573; X32-NEXT:    retl
574;
575; X64-LABEL: test_mm512_maskz_permutex_pd:
576; X64:       # BB#0:
577; X64-NEXT:    kmovw %edi, %k1
578; X64-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
579; X64-NEXT:    retq
580  %arg0 = bitcast i8 %a0 to <8 x i1>
581  %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
582  %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
583  ret <8 x double> %res1
584}
585
586define <8 x i64> @test_mm512_shuffle_epi32(<8 x i64> %a0) {
587; X32-LABEL: test_mm512_shuffle_epi32:
588; X32:       # BB#0:
589; X32-NEXT:    vpshufd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
590; X32-NEXT:    retl
591;
592; X64-LABEL: test_mm512_shuffle_epi32:
593; X64:       # BB#0:
594; X64-NEXT:    vpshufd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
595; X64-NEXT:    retq
596  %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
597  %res0 = shufflevector <16 x i32> %arg0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
598  %res1 = bitcast <16 x i32> %res0 to <8 x i64>
599  ret <8 x i64> %res1
600}
601
602define <8 x i64> @test_mm512_mask_shuffle_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2) {
603; X32-LABEL: test_mm512_mask_shuffle_epi32:
604; X32:       # BB#0:
605; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
606; X32-NEXT:    kmovw %eax, %k1
607; X32-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
608; X32-NEXT:    retl
609;
610; X64-LABEL: test_mm512_mask_shuffle_epi32:
611; X64:       # BB#0:
612; X64-NEXT:    kmovw %edi, %k1
613; X64-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
614; X64-NEXT:    retq
615  %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
616  %arg1 = bitcast i16 %a1 to <16 x i1>
617  %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
618  %res0 = shufflevector <16 x i32> %arg2, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
619  %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
620  %res2 = bitcast <16 x i32> %res1 to <8 x i64>
621  ret <8 x i64> %res2
622}
623
624define <8 x i64> @test_mm512_maskz_shuffle_epi32(i16 %a0, <8 x i64> %a1) {
625; X32-LABEL: test_mm512_maskz_shuffle_epi32:
626; X32:       # BB#0:
627; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
628; X32-NEXT:    kmovw %eax, %k1
629; X32-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
630; X32-NEXT:    retl
631;
632; X64-LABEL: test_mm512_maskz_shuffle_epi32:
633; X64:       # BB#0:
634; X64-NEXT:    kmovw %edi, %k1
635; X64-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
636; X64-NEXT:    retq
637  %arg0 = bitcast i16 %a0 to <16 x i1>
638  %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
639  %res0 = shufflevector <16 x i32> %arg1, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
640  %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
641  %res2 = bitcast <16 x i32> %res1 to <8 x i64>
642  ret <8 x i64> %res2
643}
644
645define <8 x double> @test_mm512_shuffle_pd(<8 x double> %a0, <8 x double> %a1) {
646; X32-LABEL: test_mm512_shuffle_pd:
647; X32:       # BB#0:
648; X32-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
649; X32-NEXT:    retl
650;
651; X64-LABEL: test_mm512_shuffle_pd:
652; X64:       # BB#0:
653; X64-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
654; X64-NEXT:    retq
655  %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
656  ret <8 x double> %res
657}
658
659define <8 x double> @test_mm512_mask_shuffle_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
660; X32-LABEL: test_mm512_mask_shuffle_pd:
661; X32:       # BB#0:
662; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
663; X32-NEXT:    kmovw %eax, %k1
664; X32-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
665; X32-NEXT:    retl
666;
667; X64-LABEL: test_mm512_mask_shuffle_pd:
668; X64:       # BB#0:
669; X64-NEXT:    kmovw %edi, %k1
670; X64-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
671; X64-NEXT:    retq
672  %arg1 = bitcast i8 %a1 to <8 x i1>
673  %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
674  %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
675  ret <8 x double> %res1
676}
677
678define <8 x double> @test_mm512_maskz_shuffle_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
679; X32-LABEL: test_mm512_maskz_shuffle_pd:
680; X32:       # BB#0:
681; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
682; X32-NEXT:    kmovw %eax, %k1
683; X32-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
684; X32-NEXT:    retl
685;
686; X64-LABEL: test_mm512_maskz_shuffle_pd:
687; X64:       # BB#0:
688; X64-NEXT:    kmovw %edi, %k1
689; X64-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
690; X64-NEXT:    retq
691  %arg0 = bitcast i8 %a0 to <8 x i1>
692  %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
693  %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
694  ret <8 x double> %res1
695}
696
697define <8 x i64> @test_mm512_unpackhi_epi32(<8 x i64> %a0, <8 x i64> %a1) {
698; X32-LABEL: test_mm512_unpackhi_epi32:
699; X32:       # BB#0:
700; X32-NEXT:    vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
701; X32-NEXT:    retl
702;
703; X64-LABEL: test_mm512_unpackhi_epi32:
704; X64:       # BB#0:
705; X64-NEXT:    vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
706; X64-NEXT:    retq
707  %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
708  %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
709  %res0 = shufflevector <16 x i32> %arg0, <16 x i32> %arg1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
710  %res1 = bitcast <16 x i32> %res0 to <8 x i64>
711  ret <8 x i64> %res1
712}
713
714define <8 x i64> @test_mm512_mask_unpackhi_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
715; X32-LABEL: test_mm512_mask_unpackhi_epi32:
716; X32:       # BB#0:
717; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
718; X32-NEXT:    kmovw %eax, %k1
719; X32-NEXT:    vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
720; X32-NEXT:    retl
721;
722; X64-LABEL: test_mm512_mask_unpackhi_epi32:
723; X64:       # BB#0:
724; X64-NEXT:    kmovw %edi, %k1
725; X64-NEXT:    vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
726; X64-NEXT:    retq
727  %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
728  %arg1 = bitcast i16 %a1 to <16 x i1>
729  %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
730  %arg3 = bitcast <8 x i64> %a3 to <16 x i32>
731  %res0 = shufflevector <16 x i32> %arg2, <16 x i32> %arg3, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
732  %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
733  %res2 = bitcast <16 x i32> %res1 to <8 x i64>
734  ret <8 x i64> %res2
735}
736
737define <8 x i64> @test_mm512_maskz_unpackhi_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
738; X32-LABEL: test_mm512_maskz_unpackhi_epi32:
739; X32:       # BB#0:
740; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
741; X32-NEXT:    kmovw %eax, %k1
742; X32-NEXT:    vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
743; X32-NEXT:    retl
744;
745; X64-LABEL: test_mm512_maskz_unpackhi_epi32:
746; X64:       # BB#0:
747; X64-NEXT:    kmovw %edi, %k1
748; X64-NEXT:    vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
749; X64-NEXT:    retq
750  %arg0 = bitcast i16 %a0 to <16 x i1>
751  %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
752  %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
753  %res0 = shufflevector <16 x i32> %arg1, <16 x i32> %arg2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
754  %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
755  %res2 = bitcast <16 x i32> %res1 to <8 x i64>
756  ret <8 x i64> %res2
757}
758
759define <8 x i64> @test_mm512_unpackhi_epi64(<8 x i64> %a0, <8 x i64> %a1) {
760; X32-LABEL: test_mm512_unpackhi_epi64:
761; X32:       # BB#0:
762; X32-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
763; X32-NEXT:    retl
764;
765; X64-LABEL: test_mm512_unpackhi_epi64:
766; X64:       # BB#0:
767; X64-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
768; X64-NEXT:    retq
769  %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
770  ret <8 x i64> %res
771}
772
773define <8 x i64> @test_mm512_mask_unpackhi_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
774; X32-LABEL: test_mm512_mask_unpackhi_epi64:
775; X32:       # BB#0:
776; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
777; X32-NEXT:    kmovw %eax, %k1
778; X32-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
779; X32-NEXT:    retl
780;
781; X64-LABEL: test_mm512_mask_unpackhi_epi64:
782; X64:       # BB#0:
783; X64-NEXT:    kmovw %edi, %k1
784; X64-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
785; X64-NEXT:    retq
786  %arg1 = bitcast i8 %a1 to <8 x i1>
787  %res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
788  %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
789  ret <8 x i64> %res1
790}
791
792define <8 x i64> @test_mm512_maskz_unpackhi_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
793; X32-LABEL: test_mm512_maskz_unpackhi_epi64:
794; X32:       # BB#0:
795; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
796; X32-NEXT:    kmovw %eax, %k1
797; X32-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
798; X32-NEXT:    retl
799;
800; X64-LABEL: test_mm512_maskz_unpackhi_epi64:
801; X64:       # BB#0:
802; X64-NEXT:    kmovw %edi, %k1
803; X64-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
804; X64-NEXT:    retq
805  %arg0 = bitcast i8 %a0 to <8 x i1>
806  %res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
807  %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
808  ret <8 x i64> %res1
809}
810
811define <8 x double> @test_mm512_unpackhi_pd(<8 x double> %a0, <8 x double> %a1) {
812; X32-LABEL: test_mm512_unpackhi_pd:
813; X32:       # BB#0:
814; X32-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
815; X32-NEXT:    retl
816;
817; X64-LABEL: test_mm512_unpackhi_pd:
818; X64:       # BB#0:
819; X64-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
820; X64-NEXT:    retq
821  %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
822  ret <8 x double> %res
823}
824
825define <8 x double> @test_mm512_mask_unpackhi_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
826; X32-LABEL: test_mm512_mask_unpackhi_pd:
827; X32:       # BB#0:
828; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
829; X32-NEXT:    kmovw %eax, %k1
830; X32-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
831; X32-NEXT:    retl
832;
833; X64-LABEL: test_mm512_mask_unpackhi_pd:
834; X64:       # BB#0:
835; X64-NEXT:    kmovw %edi, %k1
836; X64-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
837; X64-NEXT:    retq
838  %arg1 = bitcast i8 %a1 to <8 x i1>
839  %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
840  %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
841  ret <8 x double> %res1
842}
843
844define <8 x double> @test_mm512_maskz_unpackhi_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
845; X32-LABEL: test_mm512_maskz_unpackhi_pd:
846; X32:       # BB#0:
847; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
848; X32-NEXT:    kmovw %eax, %k1
849; X32-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
850; X32-NEXT:    retl
851;
852; X64-LABEL: test_mm512_maskz_unpackhi_pd:
853; X64:       # BB#0:
854; X64-NEXT:    kmovw %edi, %k1
855; X64-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
856; X64-NEXT:    retq
857  %arg0 = bitcast i8 %a0 to <8 x i1>
858  %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
859  %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
860  ret <8 x double> %res1
861}
862
863define <16 x float> @test_mm512_unpackhi_ps(<16 x float> %a0, <16 x float> %a1) {
864; X32-LABEL: test_mm512_unpackhi_ps:
865; X32:       # BB#0:
866; X32-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
867; X32-NEXT:    retl
868;
869; X64-LABEL: test_mm512_unpackhi_ps:
870; X64:       # BB#0:
871; X64-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
872; X64-NEXT:    retq
873  %res = shufflevector <16 x float> %a0, <16 x float> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
874  ret <16 x float> %res
875}
876
877define <16 x float> @test_mm512_mask_unpackhi_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
878; X32-LABEL: test_mm512_mask_unpackhi_ps:
879; X32:       # BB#0:
880; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
881; X32-NEXT:    kmovw %eax, %k1
882; X32-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
883; X32-NEXT:    retl
884;
885; X64-LABEL: test_mm512_mask_unpackhi_ps:
886; X64:       # BB#0:
887; X64-NEXT:    kmovw %edi, %k1
888; X64-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
889; X64-NEXT:    retq
890  %arg1 = bitcast i16 %a1 to <16 x i1>
891  %res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
892  %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
893  ret <16 x float> %res1
894}
895
896define <16 x float> @test_mm512_maskz_unpackhi_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
897; X32-LABEL: test_mm512_maskz_unpackhi_ps:
898; X32:       # BB#0:
899; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
900; X32-NEXT:    kmovw %eax, %k1
901; X32-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
902; X32-NEXT:    retl
903;
904; X64-LABEL: test_mm512_maskz_unpackhi_ps:
905; X64:       # BB#0:
906; X64-NEXT:    kmovw %edi, %k1
907; X64-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
908; X64-NEXT:    retq
909  %arg0 = bitcast i16 %a0 to <16 x i1>
910  %res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
911  %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
912  ret <16 x float> %res1
913}
914
915define <8 x i64> @test_mm512_unpacklo_epi32(<8 x i64> %a0, <8 x i64> %a1) {
916; X32-LABEL: test_mm512_unpacklo_epi32:
917; X32:       # BB#0:
918; X32-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
919; X32-NEXT:    retl
920;
921; X64-LABEL: test_mm512_unpacklo_epi32:
922; X64:       # BB#0:
923; X64-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
924; X64-NEXT:    retq
925  %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
926  %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
927  %res0 = shufflevector <16 x i32> %arg0, <16 x i32> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
928  %res1 = bitcast <16 x i32> %res0 to <8 x i64>
929  ret <8 x i64> %res1
930}
931
932define <8 x i64> @test_mm512_mask_unpacklo_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
933; X32-LABEL: test_mm512_mask_unpacklo_epi32:
934; X32:       # BB#0:
935; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
936; X32-NEXT:    kmovw %eax, %k1
937; X32-NEXT:    vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
938; X32-NEXT:    retl
939;
940; X64-LABEL: test_mm512_mask_unpacklo_epi32:
941; X64:       # BB#0:
942; X64-NEXT:    kmovw %edi, %k1
943; X64-NEXT:    vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
944; X64-NEXT:    retq
945  %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
946  %arg1 = bitcast i16 %a1 to <16 x i1>
947  %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
948  %arg3 = bitcast <8 x i64> %a3 to <16 x i32>
949  %res0 = shufflevector <16 x i32> %arg2, <16 x i32> %arg3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
950  %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
951  %res2 = bitcast <16 x i32> %res1 to <8 x i64>
952  ret <8 x i64> %res2
953}
954
955define <8 x i64> @test_mm512_maskz_unpacklo_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
956; X32-LABEL: test_mm512_maskz_unpacklo_epi32:
957; X32:       # BB#0:
958; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
959; X32-NEXT:    kmovw %eax, %k1
960; X32-NEXT:    vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
961; X32-NEXT:    retl
962;
963; X64-LABEL: test_mm512_maskz_unpacklo_epi32:
964; X64:       # BB#0:
965; X64-NEXT:    kmovw %edi, %k1
966; X64-NEXT:    vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
967; X64-NEXT:    retq
968  %arg0 = bitcast i16 %a0 to <16 x i1>
969  %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
970  %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
971  %res0 = shufflevector <16 x i32> %arg1, <16 x i32> %arg2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
972  %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
973  %res2 = bitcast <16 x i32> %res1 to <8 x i64>
974  ret <8 x i64> %res2
975}
976
977define <8 x i64> @test_mm512_unpacklo_epi64(<8 x i64> %a0, <8 x i64> %a1) {
978; X32-LABEL: test_mm512_unpacklo_epi64:
979; X32:       # BB#0:
980; X32-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
981; X32-NEXT:    retl
982;
983; X64-LABEL: test_mm512_unpacklo_epi64:
984; X64:       # BB#0:
985; X64-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
986; X64-NEXT:    retq
987  %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
988  ret <8 x i64> %res
989}
990
991define <8 x i64> @test_mm512_mask_unpacklo_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
992; X32-LABEL: test_mm512_mask_unpacklo_epi64:
993; X32:       # BB#0:
994; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
995; X32-NEXT:    kmovw %eax, %k1
996; X32-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
997; X32-NEXT:    retl
998;
999; X64-LABEL: test_mm512_mask_unpacklo_epi64:
1000; X64:       # BB#0:
1001; X64-NEXT:    kmovw %edi, %k1
1002; X64-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1003; X64-NEXT:    retq
1004  %arg1 = bitcast i8 %a1 to <8 x i1>
1005  %res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1006  %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
1007  ret <8 x i64> %res1
1008}
1009
1010define <8 x i64> @test_mm512_maskz_unpacklo_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
1011; X32-LABEL: test_mm512_maskz_unpacklo_epi64:
1012; X32:       # BB#0:
1013; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1014; X32-NEXT:    kmovw %eax, %k1
1015; X32-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1016; X32-NEXT:    retl
1017;
1018; X64-LABEL: test_mm512_maskz_unpacklo_epi64:
1019; X64:       # BB#0:
1020; X64-NEXT:    kmovw %edi, %k1
1021; X64-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1022; X64-NEXT:    retq
1023  %arg0 = bitcast i8 %a0 to <8 x i1>
1024  %res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1025  %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
1026  ret <8 x i64> %res1
1027}
1028
1029define <8 x double> @test_mm512_unpacklo_pd(<8 x double> %a0, <8 x double> %a1) {
1030; X32-LABEL: test_mm512_unpacklo_pd:
1031; X32:       # BB#0:
1032; X32-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1033; X32-NEXT:    retl
1034;
1035; X64-LABEL: test_mm512_unpacklo_pd:
1036; X64:       # BB#0:
1037; X64-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1038; X64-NEXT:    retq
1039  %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1040  ret <8 x double> %res
1041}
1042
1043define <8 x double> @test_mm512_mask_unpacklo_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
1044; X32-LABEL: test_mm512_mask_unpacklo_pd:
1045; X32:       # BB#0:
1046; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1047; X32-NEXT:    kmovw %eax, %k1
1048; X32-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1049; X32-NEXT:    retl
1050;
1051; X64-LABEL: test_mm512_mask_unpacklo_pd:
1052; X64:       # BB#0:
1053; X64-NEXT:    kmovw %edi, %k1
1054; X64-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1055; X64-NEXT:    retq
1056  %arg1 = bitcast i8 %a1 to <8 x i1>
1057  %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1058  %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
1059  ret <8 x double> %res1
1060}
1061
1062define <8 x double> @test_mm512_maskz_unpacklo_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
1063; X32-LABEL: test_mm512_maskz_unpacklo_pd:
1064; X32:       # BB#0:
1065; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1066; X32-NEXT:    kmovw %eax, %k1
1067; X32-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1068; X32-NEXT:    retl
1069;
1070; X64-LABEL: test_mm512_maskz_unpacklo_pd:
1071; X64:       # BB#0:
1072; X64-NEXT:    kmovw %edi, %k1
1073; X64-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1074; X64-NEXT:    retq
1075  %arg0 = bitcast i8 %a0 to <8 x i1>
1076  %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1077  %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
1078  ret <8 x double> %res1
1079}
1080
1081define <16 x float> @test_mm512_unpacklo_ps(<16 x float> %a0, <16 x float> %a1) {
1082; X32-LABEL: test_mm512_unpacklo_ps:
1083; X32:       # BB#0:
1084; X32-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1085; X32-NEXT:    retl
1086;
1087; X64-LABEL: test_mm512_unpacklo_ps:
1088; X64:       # BB#0:
1089; X64-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1090; X64-NEXT:    retq
1091  %res = shufflevector <16 x float> %a0, <16 x float> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1092  ret <16 x float> %res
1093}
1094
1095define <16 x float> @test_mm512_mask_unpacklo_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
1096; X32-LABEL: test_mm512_mask_unpacklo_ps:
1097; X32:       # BB#0:
1098; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
1099; X32-NEXT:    kmovw %eax, %k1
1100; X32-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
1101; X32-NEXT:    retl
1102;
1103; X64-LABEL: test_mm512_mask_unpacklo_ps:
1104; X64:       # BB#0:
1105; X64-NEXT:    kmovw %edi, %k1
1106; X64-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
1107; X64-NEXT:    retq
1108  %arg1 = bitcast i16 %a1 to <16 x i1>
1109  %res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1110  %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
1111  ret <16 x float> %res1
1112}
1113
1114define <16 x float> @test_mm512_maskz_unpacklo_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
1115; X32-LABEL: test_mm512_maskz_unpacklo_ps:
1116; X32:       # BB#0:
1117; X32-NEXT:    movw {{[0-9]+}}(%esp), %ax
1118; X32-NEXT:    kmovw %eax, %k1
1119; X32-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1120; X32-NEXT:    retl
1121;
1122; X64-LABEL: test_mm512_maskz_unpacklo_ps:
1123; X64:       # BB#0:
1124; X64-NEXT:    kmovw %edi, %k1
1125; X64-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1126; X64-NEXT:    retq
1127  %arg0 = bitcast i16 %a0 to <16 x i1>
1128  %res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1129  %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
1130  ret <16 x float> %res1
1131}
1132
1133!0 = !{i32 1}
1134
1135