• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=skx | FileCheck %s --check-prefix=SKX64
3; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=knl | FileCheck %s --check-prefix=KNL64
4; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=skx | FileCheck %s --check-prefix=SKX32
5; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=knl | FileCheck %s --check-prefix=KNL32
6
7;expand 128 -> 256 include <4 x float> <2 x double>
8define <8 x float> @expand(<4 x float> %a) {
9; SKX64-LABEL: expand:
10; SKX64:       # %bb.0:
11; SKX64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
12; SKX64-NEXT:    movb $5, %al
13; SKX64-NEXT:    kmovd %eax, %k1
14; SKX64-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z}
15; SKX64-NEXT:    retq
16;
17; KNL64-LABEL: expand:
18; KNL64:       # %bb.0:
19; KNL64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
20; KNL64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
21; KNL64-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4,5,6,7]
22; KNL64-NEXT:    retq
23;
24; SKX32-LABEL: expand:
25; SKX32:       # %bb.0:
26; SKX32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
27; SKX32-NEXT:    movb $5, %al
28; SKX32-NEXT:    kmovd %eax, %k1
29; SKX32-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z}
30; SKX32-NEXT:    retl
31;
32; KNL32-LABEL: expand:
33; KNL32:       # %bb.0:
34; KNL32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
35; KNL32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
36; KNL32-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4,5,6,7]
37; KNL32-NEXT:    retl
38   %res = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 5, i32 1, i32 5, i32 5, i32 5, i32 5, i32 5>
39   ret <8 x float> %res
40}
41
42define <8 x float> @expand1(<4 x float> %a ) {
43; SKX64-LABEL: expand1:
44; SKX64:       # %bb.0:
45; SKX64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
46; SKX64-NEXT:    movb $-86, %al
47; SKX64-NEXT:    kmovd %eax, %k1
48; SKX64-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z}
49; SKX64-NEXT:    retq
50;
51; KNL64-LABEL: expand1:
52; KNL64:       # %bb.0:
53; KNL64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
54; KNL64-NEXT:    vmovaps {{.*#+}} ymm1 = <u,0,u,1,u,2,u,3>
55; KNL64-NEXT:    vpermps %ymm0, %ymm1, %ymm0
56; KNL64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
57; KNL64-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
58; KNL64-NEXT:    retq
59;
60; SKX32-LABEL: expand1:
61; SKX32:       # %bb.0:
62; SKX32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
63; SKX32-NEXT:    movb $-86, %al
64; SKX32-NEXT:    kmovd %eax, %k1
65; SKX32-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z}
66; SKX32-NEXT:    retl
67;
68; KNL32-LABEL: expand1:
69; KNL32:       # %bb.0:
70; KNL32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
71; KNL32-NEXT:    vmovaps {{.*#+}} ymm1 = <u,0,u,1,u,2,u,3>
72; KNL32-NEXT:    vpermps %ymm0, %ymm1, %ymm0
73; KNL32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
74; KNL32-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
75; KNL32-NEXT:    retl
76   %res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
77   ret <8 x float> %res
78}
79
80;Expand 128 -> 256 test <2 x double> -> <4 x double>
81define <4 x double> @expand2(<2 x double> %a) {
82; SKX64-LABEL: expand2:
83; SKX64:       # %bb.0:
84; SKX64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
85; SKX64-NEXT:    movb $9, %al
86; SKX64-NEXT:    kmovd %eax, %k1
87; SKX64-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z}
88; SKX64-NEXT:    retq
89;
90; KNL64-LABEL: expand2:
91; KNL64:       # %bb.0:
92; KNL64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
93; KNL64-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1]
94; KNL64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
95; KNL64-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
96; KNL64-NEXT:    retq
97;
98; SKX32-LABEL: expand2:
99; SKX32:       # %bb.0:
100; SKX32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
101; SKX32-NEXT:    movb $9, %al
102; SKX32-NEXT:    kmovd %eax, %k1
103; SKX32-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z}
104; SKX32-NEXT:    retl
105;
106; KNL32-LABEL: expand2:
107; KNL32:       # %bb.0:
108; KNL32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
109; KNL32-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1]
110; KNL32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
111; KNL32-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
112; KNL32-NEXT:    retl
113   %res = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 2, i32 1>
114   ret <4 x double> %res
115}
116
117;expand 128 -> 256 include case <4 x i32> <8 x i32>
118define <8 x i32> @expand3(<4 x i32> %a ) {
119; SKX64-LABEL: expand3:
120; SKX64:       # %bb.0:
121; SKX64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
122; SKX64-NEXT:    movb $-127, %al
123; SKX64-NEXT:    kmovd %eax, %k1
124; SKX64-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z}
125; SKX64-NEXT:    retq
126;
127; KNL64-LABEL: expand3:
128; KNL64:       # %bb.0:
129; KNL64-NEXT:    vbroadcastsd %xmm0, %ymm0
130; KNL64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
131; KNL64-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6],ymm0[7]
132; KNL64-NEXT:    retq
133;
134; SKX32-LABEL: expand3:
135; SKX32:       # %bb.0:
136; SKX32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
137; SKX32-NEXT:    movb $-127, %al
138; SKX32-NEXT:    kmovd %eax, %k1
139; SKX32-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z}
140; SKX32-NEXT:    retl
141;
142; KNL32-LABEL: expand3:
143; KNL32:       # %bb.0:
144; KNL32-NEXT:    vbroadcastsd %xmm0, %ymm0
145; KNL32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
146; KNL32-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6],ymm0[7]
147; KNL32-NEXT:    retl
148   %res = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <8 x i32> <i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,i32 5>
149   ret <8 x i32> %res
150}
151
152;expand 128 -> 256 include case <2 x i64> <4 x i64>
153define <4 x i64> @expand4(<2 x i64> %a ) {
154; SKX64-LABEL: expand4:
155; SKX64:       # %bb.0:
156; SKX64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
157; SKX64-NEXT:    movb $9, %al
158; SKX64-NEXT:    kmovd %eax, %k1
159; SKX64-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z}
160; SKX64-NEXT:    retq
161;
162; KNL64-LABEL: expand4:
163; KNL64:       # %bb.0:
164; KNL64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
165; KNL64-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1]
166; KNL64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
167; KNL64-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
168; KNL64-NEXT:    retq
169;
170; SKX32-LABEL: expand4:
171; SKX32:       # %bb.0:
172; SKX32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
173; SKX32-NEXT:    movb $9, %al
174; SKX32-NEXT:    kmovd %eax, %k1
175; SKX32-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z}
176; SKX32-NEXT:    retl
177;
178; KNL32-LABEL: expand4:
179; KNL32:       # %bb.0:
180; KNL32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
181; KNL32-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1]
182; KNL32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
183; KNL32-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
184; KNL32-NEXT:    retl
185   %res = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <4 x i32> <i32 2, i32 0, i32 0, i32 3>
186   ret <4 x i64> %res
187}
188
189;Negative test for 128-> 256
190define <8 x float> @expand5(<4 x float> %a ) {
191; SKX64-LABEL: expand5:
192; SKX64:       # %bb.0:
193; SKX64-NEXT:    vbroadcastss %xmm0, %ymm0
194; SKX64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
195; SKX64-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
196; SKX64-NEXT:    retq
197;
198; KNL64-LABEL: expand5:
199; KNL64:       # %bb.0:
200; KNL64-NEXT:    vbroadcastss %xmm0, %ymm0
201; KNL64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
202; KNL64-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
203; KNL64-NEXT:    retq
204;
205; SKX32-LABEL: expand5:
206; SKX32:       # %bb.0:
207; SKX32-NEXT:    vbroadcastss %xmm0, %ymm0
208; SKX32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
209; SKX32-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
210; SKX32-NEXT:    retl
211;
212; KNL32-LABEL: expand5:
213; KNL32:       # %bb.0:
214; KNL32-NEXT:    vbroadcastss %xmm0, %ymm0
215; KNL32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
216; KNL32-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
217; KNL32-NEXT:    retl
218   %res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
219   ret <8 x float> %res
220}
221
222;expand 256 -> 512 include <8 x float> <16 x float>
223define <8 x float> @expand6(<4 x float> %a ) {
224; SKX64-LABEL: expand6:
225; SKX64:       # %bb.0:
226; SKX64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
227; SKX64-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
228; SKX64-NEXT:    retq
229;
230; KNL64-LABEL: expand6:
231; KNL64:       # %bb.0:
232; KNL64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
233; KNL64-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
234; KNL64-NEXT:    retq
235;
236; SKX32-LABEL: expand6:
237; SKX32:       # %bb.0:
238; SKX32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
239; SKX32-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
240; SKX32-NEXT:    retl
241;
242; KNL32-LABEL: expand6:
243; KNL32:       # %bb.0:
244; KNL32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
245; KNL32-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
246; KNL32-NEXT:    retl
247   %res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
248   ret <8 x float> %res
249}
250
251define <16 x float> @expand7(<8 x float> %a) {
252; SKX64-LABEL: expand7:
253; SKX64:       # %bb.0:
254; SKX64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
255; SKX64-NEXT:    movw $1285, %ax # imm = 0x505
256; SKX64-NEXT:    kmovd %eax, %k1
257; SKX64-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z}
258; SKX64-NEXT:    retq
259;
260; KNL64-LABEL: expand7:
261; KNL64:       # %bb.0:
262; KNL64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
263; KNL64-NEXT:    movw $1285, %ax # imm = 0x505
264; KNL64-NEXT:    kmovw %eax, %k1
265; KNL64-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z}
266; KNL64-NEXT:    retq
267;
268; SKX32-LABEL: expand7:
269; SKX32:       # %bb.0:
270; SKX32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
271; SKX32-NEXT:    movw $1285, %ax # imm = 0x505
272; SKX32-NEXT:    kmovd %eax, %k1
273; SKX32-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z}
274; SKX32-NEXT:    retl
275;
276; KNL32-LABEL: expand7:
277; KNL32:       # %bb.0:
278; KNL32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
279; KNL32-NEXT:    movw $1285, %ax # imm = 0x505
280; KNL32-NEXT:    kmovw %eax, %k1
281; KNL32-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z}
282; KNL32-NEXT:    retl
283   %res = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 2, i32 8, i32 3, i32 8, i32 8, i32 8, i32 8, i32 8>
284   ret <16 x float> %res
285}
286
287define <16 x float> @expand8(<8 x float> %a ) {
288; SKX64-LABEL: expand8:
289; SKX64:       # %bb.0:
290; SKX64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
291; SKX64-NEXT:    movw $-21846, %ax # imm = 0xAAAA
292; SKX64-NEXT:    kmovd %eax, %k1
293; SKX64-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z}
294; SKX64-NEXT:    retq
295;
296; KNL64-LABEL: expand8:
297; KNL64:       # %bb.0:
298; KNL64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
299; KNL64-NEXT:    movw $-21846, %ax # imm = 0xAAAA
300; KNL64-NEXT:    kmovw %eax, %k1
301; KNL64-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z}
302; KNL64-NEXT:    retq
303;
304; SKX32-LABEL: expand8:
305; SKX32:       # %bb.0:
306; SKX32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
307; SKX32-NEXT:    movw $-21846, %ax # imm = 0xAAAA
308; SKX32-NEXT:    kmovd %eax, %k1
309; SKX32-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z}
310; SKX32-NEXT:    retl
311;
312; KNL32-LABEL: expand8:
313; KNL32:       # %bb.0:
314; KNL32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
315; KNL32-NEXT:    movw $-21846, %ax # imm = 0xAAAA
316; KNL32-NEXT:    kmovw %eax, %k1
317; KNL32-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z}
318; KNL32-NEXT:    retl
319   %res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
320   ret <16 x float> %res
321}
322
323;expand 256 -> 512 include <4 x double> <8 x double>
324define <8 x double> @expand9(<4 x double> %a) {
325; SKX64-LABEL: expand9:
326; SKX64:       # %bb.0:
327; SKX64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
328; SKX64-NEXT:    movb $-127, %al
329; SKX64-NEXT:    kmovd %eax, %k1
330; SKX64-NEXT:    vexpandpd %zmm0, %zmm0 {%k1} {z}
331; SKX64-NEXT:    retq
332;
333; KNL64-LABEL: expand9:
334; KNL64:       # %bb.0:
335; KNL64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
336; KNL64-NEXT:    movb $-127, %al
337; KNL64-NEXT:    kmovw %eax, %k1
338; KNL64-NEXT:    vexpandpd %zmm0, %zmm0 {%k1} {z}
339; KNL64-NEXT:    retq
340;
341; SKX32-LABEL: expand9:
342; SKX32:       # %bb.0:
343; SKX32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
344; SKX32-NEXT:    movb $-127, %al
345; SKX32-NEXT:    kmovd %eax, %k1
346; SKX32-NEXT:    vexpandpd %zmm0, %zmm0 {%k1} {z}
347; SKX32-NEXT:    retl
348;
349; KNL32-LABEL: expand9:
350; KNL32:       # %bb.0:
351; KNL32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
352; KNL32-NEXT:    movb $-127, %al
353; KNL32-NEXT:    kmovw %eax, %k1
354; KNL32-NEXT:    vexpandpd %zmm0, %zmm0 {%k1} {z}
355; KNL32-NEXT:    retl
356   %res = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 1>
357   ret <8 x double> %res
358}
359
360define <16 x i32> @expand10(<8 x i32> %a ) {
361; SKX64-LABEL: expand10:
362; SKX64:       # %bb.0:
363; SKX64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
364; SKX64-NEXT:    movw $-21846, %ax # imm = 0xAAAA
365; SKX64-NEXT:    kmovd %eax, %k1
366; SKX64-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
367; SKX64-NEXT:    retq
368;
369; KNL64-LABEL: expand10:
370; KNL64:       # %bb.0:
371; KNL64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
372; KNL64-NEXT:    movw $-21846, %ax # imm = 0xAAAA
373; KNL64-NEXT:    kmovw %eax, %k1
374; KNL64-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
375; KNL64-NEXT:    retq
376;
377; SKX32-LABEL: expand10:
378; SKX32:       # %bb.0:
379; SKX32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
380; SKX32-NEXT:    movw $-21846, %ax # imm = 0xAAAA
381; SKX32-NEXT:    kmovd %eax, %k1
382; SKX32-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
383; SKX32-NEXT:    retl
384;
385; KNL32-LABEL: expand10:
386; KNL32:       # %bb.0:
387; KNL32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
388; KNL32-NEXT:    movw $-21846, %ax # imm = 0xAAAA
389; KNL32-NEXT:    kmovw %eax, %k1
390; KNL32-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
391; KNL32-NEXT:    retl
392   %res = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
393   ret <16 x i32> %res
394}
395
396define <8 x i64> @expand11(<4 x i64> %a) {
397; SKX64-LABEL: expand11:
398; SKX64:       # %bb.0:
399; SKX64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
400; SKX64-NEXT:    movb $-127, %al
401; SKX64-NEXT:    kmovd %eax, %k1
402; SKX64-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z}
403; SKX64-NEXT:    retq
404;
405; KNL64-LABEL: expand11:
406; KNL64:       # %bb.0:
407; KNL64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
408; KNL64-NEXT:    movb $-127, %al
409; KNL64-NEXT:    kmovw %eax, %k1
410; KNL64-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z}
411; KNL64-NEXT:    retq
412;
413; SKX32-LABEL: expand11:
414; SKX32:       # %bb.0:
415; SKX32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
416; SKX32-NEXT:    movb $-127, %al
417; SKX32-NEXT:    kmovd %eax, %k1
418; SKX32-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z}
419; SKX32-NEXT:    retl
420;
421; KNL32-LABEL: expand11:
422; KNL32:       # %bb.0:
423; KNL32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
424; KNL32-NEXT:    movb $-127, %al
425; KNL32-NEXT:    kmovw %eax, %k1
426; KNL32-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z}
427; KNL32-NEXT:    retl
428   %res = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 1>
429   ret <8 x i64> %res
430}
431
432;Negative test for 256-> 512
433define <16 x float> @expand12(<8 x float> %a) {
434; SKX64-LABEL: expand12:
435; SKX64:       # %bb.0:
436; SKX64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
437; SKX64-NEXT:    vmovaps {{.*#+}} zmm2 = [0,16,2,16,4,16,6,16,0,16,1,16,2,16,3,16]
438; SKX64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
439; SKX64-NEXT:    vpermt2ps %zmm0, %zmm2, %zmm1
440; SKX64-NEXT:    vmovaps %zmm1, %zmm0
441; SKX64-NEXT:    retq
442;
443; KNL64-LABEL: expand12:
444; KNL64:       # %bb.0:
445; KNL64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
446; KNL64-NEXT:    vmovaps {{.*#+}} zmm2 = [0,16,2,16,4,16,6,16,0,16,1,16,2,16,3,16]
447; KNL64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
448; KNL64-NEXT:    vpermt2ps %zmm0, %zmm2, %zmm1
449; KNL64-NEXT:    vmovaps %zmm1, %zmm0
450; KNL64-NEXT:    retq
451;
452; SKX32-LABEL: expand12:
453; SKX32:       # %bb.0:
454; SKX32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
455; SKX32-NEXT:    vmovaps {{.*#+}} zmm2 = [0,16,2,16,4,16,6,16,0,16,1,16,2,16,3,16]
456; SKX32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
457; SKX32-NEXT:    vpermt2ps %zmm0, %zmm2, %zmm1
458; SKX32-NEXT:    vmovaps %zmm1, %zmm0
459; SKX32-NEXT:    retl
460;
461; KNL32-LABEL: expand12:
462; KNL32:       # %bb.0:
463; KNL32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
464; KNL32-NEXT:    vmovaps {{.*#+}} zmm2 = [0,16,2,16,4,16,6,16,0,16,1,16,2,16,3,16]
465; KNL32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
466; KNL32-NEXT:    vpermt2ps %zmm0, %zmm2, %zmm1
467; KNL32-NEXT:    vmovaps %zmm1, %zmm0
468; KNL32-NEXT:    retl
469   %res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8,i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
470   ret <16 x float> %res
471}
472
473define <16 x float> @expand13(<8 x float> %a ) {
474; SKX64-LABEL: expand13:
475; SKX64:       # %bb.0:
476; SKX64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
477; SKX64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
478; SKX64-NEXT:    retq
479;
480; KNL64-LABEL: expand13:
481; KNL64:       # %bb.0:
482; KNL64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
483; KNL64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
484; KNL64-NEXT:    retq
485;
486; SKX32-LABEL: expand13:
487; SKX32:       # %bb.0:
488; SKX32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
489; SKX32-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
490; SKX32-NEXT:    retl
491;
492; KNL32-LABEL: expand13:
493; KNL32:       # %bb.0:
494; KNL32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
495; KNL32-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
496; KNL32-NEXT:    retl
497   %res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
498   ret <16 x float> %res
499}
500
501; The function checks for a case where the vector is mixed values vector ,and the mask points on zero elements from this vector.
502
503define <8 x float> @expand14(<4 x float> %a) {
504; SKX64-LABEL: expand14:
505; SKX64:       # %bb.0:
506; SKX64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
507; SKX64-NEXT:    movb $20, %al
508; SKX64-NEXT:    kmovd %eax, %k1
509; SKX64-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z}
510; SKX64-NEXT:    retq
511;
512; KNL64-LABEL: expand14:
513; KNL64:       # %bb.0:
514; KNL64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
515; KNL64-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
516; KNL64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
517; KNL64-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
518; KNL64-NEXT:    retq
519;
520; SKX32-LABEL: expand14:
521; SKX32:       # %bb.0:
522; SKX32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
523; SKX32-NEXT:    movb $20, %al
524; SKX32-NEXT:    kmovd %eax, %k1
525; SKX32-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z}
526; SKX32-NEXT:    retl
527;
528; KNL32-LABEL: expand14:
529; KNL32:       # %bb.0:
530; KNL32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
531; KNL32-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
532; KNL32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
533; KNL32-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
534; KNL32-NEXT:    retl
535   %addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0>
536   %res = shufflevector <4 x float> %addV, <4 x float> %a, <8 x i32> <i32 3, i32 3, i32 4, i32 0, i32 5, i32 0, i32 0, i32 0>
537   ret <8 x float> %res
538}
539
540;Negative test.
541define <8 x float> @expand15(<4 x float> %a) {
542; SKX64-LABEL: expand15:
543; SKX64:       # %bb.0:
544; SKX64-NEXT:    vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
545; SKX64-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
546; SKX64-NEXT:    vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
547; SKX64-NEXT:    vpermi2ps %ymm2, %ymm1, %ymm0
548; SKX64-NEXT:    retq
549;
550; KNL64-LABEL: expand15:
551; KNL64:       # %bb.0:
552; KNL64-NEXT:    vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
553; KNL64-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
554; KNL64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
555; KNL64-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
556; KNL64-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
557; KNL64-NEXT:    retq
558;
559; SKX32-LABEL: expand15:
560; SKX32:       # %bb.0:
561; SKX32-NEXT:    vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
562; SKX32-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
563; SKX32-NEXT:    vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
564; SKX32-NEXT:    vpermi2ps %ymm2, %ymm1, %ymm0
565; SKX32-NEXT:    retl
566;
567; KNL32-LABEL: expand15:
568; KNL32:       # %bb.0:
569; KNL32-NEXT:    vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
570; KNL32-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
571; KNL32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
572; KNL32-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
573; KNL32-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
574; KNL32-NEXT:    retl
575   %addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0>
576   %res = shufflevector <4 x float> %addV, <4 x float> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 0, i32 5, i32 0, i32 0, i32 0>
577   ret <8 x float> %res
578}
579
580
581; Shuffle to blend test
582
583define <64 x i8> @test_mm512_mask_blend_epi8(<64 x i8> %A, <64 x i8> %W){
584; SKX64-LABEL: test_mm512_mask_blend_epi8:
585; SKX64:       # %bb.0: # %entry
586; SKX64-NEXT:    movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA
587; SKX64-NEXT:    kmovq %rax, %k1
588; SKX64-NEXT:    vpblendmb %zmm0, %zmm1, %zmm0 {%k1}
589; SKX64-NEXT:    retq
590;
591; KNL64-LABEL: test_mm512_mask_blend_epi8:
592; KNL64:       # %bb.0: # %entry
593; KNL64-NEXT:    vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
594; KNL64-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
595; KNL64-NEXT:    vpblendvb %ymm4, %ymm3, %ymm1, %ymm1
596; KNL64-NEXT:    retq
597;
598; SKX32-LABEL: test_mm512_mask_blend_epi8:
599; SKX32:       # %bb.0: # %entry
600; SKX32-NEXT:    movl $-1431655766, %eax # imm = 0xAAAAAAAA
601; SKX32-NEXT:    kmovd %eax, %k0
602; SKX32-NEXT:    kunpckdq %k0, %k0, %k1
603; SKX32-NEXT:    vpblendmb %zmm0, %zmm1, %zmm0 {%k1}
604; SKX32-NEXT:    retl
605;
606; KNL32-LABEL: test_mm512_mask_blend_epi8:
607; KNL32:       # %bb.0: # %entry
608; KNL32-NEXT:    pushl %ebp
609; KNL32-NEXT:    .cfi_def_cfa_offset 8
610; KNL32-NEXT:    .cfi_offset %ebp, -8
611; KNL32-NEXT:    movl %esp, %ebp
612; KNL32-NEXT:    .cfi_def_cfa_register %ebp
613; KNL32-NEXT:    andl $-32, %esp
614; KNL32-NEXT:    subl $32, %esp
615; KNL32-NEXT:    vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
616; KNL32-NEXT:    vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
617; KNL32-NEXT:    vpblendvb %ymm3, 8(%ebp), %ymm1, %ymm1
618; KNL32-NEXT:    movl %ebp, %esp
619; KNL32-NEXT:    popl %ebp
620; KNL32-NEXT:    .cfi_def_cfa %esp, 4
621; KNL32-NEXT:    retl
622entry:
623  %0 = shufflevector <64 x i8> %A, <64 x i8> %W, <64 x i32>  <i32 64, i32 1, i32 66, i32 3, i32 68, i32 5, i32 70, i32 7, i32 72, i32 9, i32 74, i32 11, i32 76, i32 13, i32 78, i32 15, i32 80, i32 17, i32 82, i32 19, i32 84, i32 21, i32 86, i32 23, i32 88, i32 25, i32 90, i32 27, i32 92, i32 29, i32 94, i32 31, i32 96, i32 33, i32 98, i32 35, i32 100, i32 37, i32 102, i32 39, i32 104, i32 41, i32 106, i32 43, i32 108, i32 45, i32 110, i32 47, i32 112, i32 49, i32 114, i32 51, i32 116, i32 53, i32 118, i32 55, i32 120, i32 57, i32 122, i32 59, i32 124, i32 61, i32 126, i32 63>
624  ret <64 x i8> %0
625}
626
627define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){
628; SKX64-LABEL: test_mm512_mask_blend_epi16:
629; SKX64:       # %bb.0: # %entry
630; SKX64-NEXT:    movl $-1431655766, %eax # imm = 0xAAAAAAAA
631; SKX64-NEXT:    kmovd %eax, %k1
632; SKX64-NEXT:    vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
633; SKX64-NEXT:    retq
634;
635; KNL64-LABEL: test_mm512_mask_blend_epi16:
636; KNL64:       # %bb.0: # %entry
637; KNL64-NEXT:    vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3],ymm2[4],ymm0[5],ymm2[6],ymm0[7],ymm2[8],ymm0[9],ymm2[10],ymm0[11],ymm2[12],ymm0[13],ymm2[14],ymm0[15]
638; KNL64-NEXT:    vpblendw {{.*#+}} ymm1 = ymm3[0],ymm1[1],ymm3[2],ymm1[3],ymm3[4],ymm1[5],ymm3[6],ymm1[7],ymm3[8],ymm1[9],ymm3[10],ymm1[11],ymm3[12],ymm1[13],ymm3[14],ymm1[15]
639; KNL64-NEXT:    retq
640;
641; SKX32-LABEL: test_mm512_mask_blend_epi16:
642; SKX32:       # %bb.0: # %entry
643; SKX32-NEXT:    movl $-1431655766, %eax # imm = 0xAAAAAAAA
644; SKX32-NEXT:    kmovd %eax, %k1
645; SKX32-NEXT:    vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
646; SKX32-NEXT:    retl
647;
648; KNL32-LABEL: test_mm512_mask_blend_epi16:
649; KNL32:       # %bb.0: # %entry
650; KNL32-NEXT:    pushl %ebp
651; KNL32-NEXT:    .cfi_def_cfa_offset 8
652; KNL32-NEXT:    .cfi_offset %ebp, -8
653; KNL32-NEXT:    movl %esp, %ebp
654; KNL32-NEXT:    .cfi_def_cfa_register %ebp
655; KNL32-NEXT:    andl $-32, %esp
656; KNL32-NEXT:    subl $32, %esp
657; KNL32-NEXT:    vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3],ymm2[4],ymm0[5],ymm2[6],ymm0[7],ymm2[8],ymm0[9],ymm2[10],ymm0[11],ymm2[12],ymm0[13],ymm2[14],ymm0[15]
658; KNL32-NEXT:    vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15]
659; KNL32-NEXT:    movl %ebp, %esp
660; KNL32-NEXT:    popl %ebp
661; KNL32-NEXT:    .cfi_def_cfa %esp, 4
662; KNL32-NEXT:    retl
663entry:
664  %0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32>  <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
665  ret <32 x i16> %0
666}
667
668define <16 x i32> @test_mm512_mask_blend_epi32(<16 x i32> %A, <16 x i32> %W){
669; SKX64-LABEL: test_mm512_mask_blend_epi32:
670; SKX64:       # %bb.0: # %entry
671; SKX64-NEXT:    movw $-21846, %ax # imm = 0xAAAA
672; SKX64-NEXT:    kmovd %eax, %k1
673; SKX64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
674; SKX64-NEXT:    retq
675;
676; KNL64-LABEL: test_mm512_mask_blend_epi32:
677; KNL64:       # %bb.0: # %entry
678; KNL64-NEXT:    movw $-21846, %ax # imm = 0xAAAA
679; KNL64-NEXT:    kmovw %eax, %k1
680; KNL64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
681; KNL64-NEXT:    retq
682;
683; SKX32-LABEL: test_mm512_mask_blend_epi32:
684; SKX32:       # %bb.0: # %entry
685; SKX32-NEXT:    movw $-21846, %ax # imm = 0xAAAA
686; SKX32-NEXT:    kmovd %eax, %k1
687; SKX32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
688; SKX32-NEXT:    retl
689;
690; KNL32-LABEL: test_mm512_mask_blend_epi32:
691; KNL32:       # %bb.0: # %entry
692; KNL32-NEXT:    movw $-21846, %ax # imm = 0xAAAA
693; KNL32-NEXT:    kmovw %eax, %k1
694; KNL32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
695; KNL32-NEXT:    retl
696entry:
697  %0 = shufflevector <16 x i32> %A, <16 x i32> %W, <16 x i32>  <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
698  ret <16 x i32> %0
699}
700
701define <8 x i64> @test_mm512_mask_blend_epi64(<8 x i64> %A, <8 x i64> %W){
702; SKX64-LABEL: test_mm512_mask_blend_epi64:
703; SKX64:       # %bb.0: # %entry
704; SKX64-NEXT:    movb $-86, %al
705; SKX64-NEXT:    kmovd %eax, %k1
706; SKX64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
707; SKX64-NEXT:    retq
708;
709; KNL64-LABEL: test_mm512_mask_blend_epi64:
710; KNL64:       # %bb.0: # %entry
711; KNL64-NEXT:    movb $-86, %al
712; KNL64-NEXT:    kmovw %eax, %k1
713; KNL64-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
714; KNL64-NEXT:    retq
715;
716; SKX32-LABEL: test_mm512_mask_blend_epi64:
717; SKX32:       # %bb.0: # %entry
718; SKX32-NEXT:    movb $-86, %al
719; SKX32-NEXT:    kmovd %eax, %k1
720; SKX32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
721; SKX32-NEXT:    retl
722;
723; KNL32-LABEL: test_mm512_mask_blend_epi64:
724; KNL32:       # %bb.0: # %entry
725; KNL32-NEXT:    movb $-86, %al
726; KNL32-NEXT:    kmovw %eax, %k1
727; KNL32-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
728; KNL32-NEXT:    retl
729entry:
730  %0 = shufflevector <8 x i64> %A, <8 x i64> %W, <8 x i32>  <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
731  ret <8 x i64> %0
732}
733
734define <16 x float> @test_mm512_mask_blend_ps(<16 x float> %A, <16 x float> %W){
735; SKX64-LABEL: test_mm512_mask_blend_ps:
736; SKX64:       # %bb.0: # %entry
737; SKX64-NEXT:    movw $-21846, %ax # imm = 0xAAAA
738; SKX64-NEXT:    kmovd %eax, %k1
739; SKX64-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
740; SKX64-NEXT:    retq
741;
742; KNL64-LABEL: test_mm512_mask_blend_ps:
743; KNL64:       # %bb.0: # %entry
744; KNL64-NEXT:    movw $-21846, %ax # imm = 0xAAAA
745; KNL64-NEXT:    kmovw %eax, %k1
746; KNL64-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
747; KNL64-NEXT:    retq
748;
749; SKX32-LABEL: test_mm512_mask_blend_ps:
750; SKX32:       # %bb.0: # %entry
751; SKX32-NEXT:    movw $-21846, %ax # imm = 0xAAAA
752; SKX32-NEXT:    kmovd %eax, %k1
753; SKX32-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
754; SKX32-NEXT:    retl
755;
756; KNL32-LABEL: test_mm512_mask_blend_ps:
757; KNL32:       # %bb.0: # %entry
758; KNL32-NEXT:    movw $-21846, %ax # imm = 0xAAAA
759; KNL32-NEXT:    kmovw %eax, %k1
760; KNL32-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
761; KNL32-NEXT:    retl
762entry:
763  %0 = shufflevector <16 x float> %A, <16 x float> %W, <16 x i32>  <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
764  ret <16 x float> %0
765}
766
767define <8 x double> @test_mm512_mask_blend_pd(<8 x double> %A, <8 x double> %W){
768; SKX64-LABEL: test_mm512_mask_blend_pd:
769; SKX64:       # %bb.0: # %entry
770; SKX64-NEXT:    movb $-88, %al
771; SKX64-NEXT:    kmovd %eax, %k1
772; SKX64-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
773; SKX64-NEXT:    retq
774;
775; KNL64-LABEL: test_mm512_mask_blend_pd:
776; KNL64:       # %bb.0: # %entry
777; KNL64-NEXT:    movb $-88, %al
778; KNL64-NEXT:    kmovw %eax, %k1
779; KNL64-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
780; KNL64-NEXT:    retq
781;
782; SKX32-LABEL: test_mm512_mask_blend_pd:
783; SKX32:       # %bb.0: # %entry
784; SKX32-NEXT:    movb $-88, %al
785; SKX32-NEXT:    kmovd %eax, %k1
786; SKX32-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
787; SKX32-NEXT:    retl
788;
789; KNL32-LABEL: test_mm512_mask_blend_pd:
790; KNL32:       # %bb.0: # %entry
791; KNL32-NEXT:    movb $-88, %al
792; KNL32-NEXT:    kmovw %eax, %k1
793; KNL32-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
794; KNL32-NEXT:    retl
795entry:
796  %0 = shufflevector <8 x double> %A, <8 x double> %W, <8 x i32>  <i32 8, i32 9, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
797  ret <8 x double> %0
798}
799
800
801define <32 x i8> @test_mm256_mask_blend_epi8(<32 x i8> %A, <32 x i8> %W){
802; SKX64-LABEL: test_mm256_mask_blend_epi8:
803; SKX64:       # %bb.0: # %entry
804; SKX64-NEXT:    movl $-1431655766, %eax # imm = 0xAAAAAAAA
805; SKX64-NEXT:    kmovd %eax, %k1
806; SKX64-NEXT:    vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
807; SKX64-NEXT:    retq
808;
809; KNL64-LABEL: test_mm256_mask_blend_epi8:
810; KNL64:       # %bb.0: # %entry
811; KNL64-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
812; KNL64-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
813; KNL64-NEXT:    retq
814;
815; SKX32-LABEL: test_mm256_mask_blend_epi8:
816; SKX32:       # %bb.0: # %entry
817; SKX32-NEXT:    movl $-1431655766, %eax # imm = 0xAAAAAAAA
818; SKX32-NEXT:    kmovd %eax, %k1
819; SKX32-NEXT:    vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
820; SKX32-NEXT:    retl
821;
822; KNL32-LABEL: test_mm256_mask_blend_epi8:
823; KNL32:       # %bb.0: # %entry
824; KNL32-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
825; KNL32-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
826; KNL32-NEXT:    retl
827entry:
828  %0 = shufflevector <32 x i8> %A, <32 x i8> %W, <32 x i32>  <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
829  ret <32 x i8> %0
830}
831
832define <16 x i8> @test_mm_mask_blend_epi8(<16 x i8> %A, <16 x i8> %W){
833; SKX64-LABEL: test_mm_mask_blend_epi8:
834; SKX64:       # %bb.0: # %entry
835; SKX64-NEXT:    movw $-21846, %ax # imm = 0xAAAA
836; SKX64-NEXT:    kmovd %eax, %k1
837; SKX64-NEXT:    vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
838; SKX64-NEXT:    retq
839;
840; KNL64-LABEL: test_mm_mask_blend_epi8:
841; KNL64:       # %bb.0: # %entry
842; KNL64-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
843; KNL64-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
844; KNL64-NEXT:    retq
845;
846; SKX32-LABEL: test_mm_mask_blend_epi8:
847; SKX32:       # %bb.0: # %entry
848; SKX32-NEXT:    movw $-21846, %ax # imm = 0xAAAA
849; SKX32-NEXT:    kmovd %eax, %k1
850; SKX32-NEXT:    vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
851; SKX32-NEXT:    retl
852;
853; KNL32-LABEL: test_mm_mask_blend_epi8:
854; KNL32:       # %bb.0: # %entry
855; KNL32-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
856; KNL32-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
857; KNL32-NEXT:    retl
858entry:
859  %0 = shufflevector <16 x i8> %A, <16 x i8> %W, <16 x i32>  <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
860  ret <16 x i8> %0
861}
862
863; PR34370
864define <8 x float> @test_masked_permps_v8f32(<8 x float>* %vp, <8 x float> %vec2) {
865; SKX64-LABEL: test_masked_permps_v8f32:
866; SKX64:       # %bb.0:
867; SKX64-NEXT:    vmovaps (%rdi), %ymm2
868; SKX64-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15]
869; SKX64-NEXT:    vpermi2ps %ymm0, %ymm2, %ymm1
870; SKX64-NEXT:    vmovaps %ymm1, %ymm0
871; SKX64-NEXT:    retq
872;
873; KNL64-LABEL: test_masked_permps_v8f32:
874; KNL64:       # %bb.0:
875; KNL64-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,2,3,7,6,6,7]
876; KNL64-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,0,2,3]
877; KNL64-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5],ymm0[6,7]
878; KNL64-NEXT:    retq
879;
880; SKX32-LABEL: test_masked_permps_v8f32:
881; SKX32:       # %bb.0:
882; SKX32-NEXT:    movl {{[0-9]+}}(%esp), %eax
883; SKX32-NEXT:    vmovaps (%eax), %ymm2
884; SKX32-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15]
885; SKX32-NEXT:    vpermi2ps %ymm0, %ymm2, %ymm1
886; SKX32-NEXT:    vmovaps %ymm1, %ymm0
887; SKX32-NEXT:    retl
888;
889; KNL32-LABEL: test_masked_permps_v8f32:
890; KNL32:       # %bb.0:
891; KNL32-NEXT:    movl {{[0-9]+}}(%esp), %eax
892; KNL32-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,2,3,7,6,6,7]
893; KNL32-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,0,2,3]
894; KNL32-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5],ymm0[6,7]
895; KNL32-NEXT:    retl
896  %vec = load <8 x float>, <8 x float>* %vp
897  %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 3, i32 0, i32 7, i32 6, i32 3, i32 0>
898  %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x float> %shuf, <8 x float> %vec2
899  ret <8 x float> %res
900}
901
902define <16 x float> @test_masked_permps_v16f32(<16 x float>* %vp, <16 x float> %vec2) {
903; SKX64-LABEL: test_masked_permps_v16f32:
904; SKX64:       # %bb.0:
905; SKX64-NEXT:    vmovaps (%rdi), %zmm2
906; SKX64-NEXT:    vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31]
907; SKX64-NEXT:    vpermi2ps %zmm0, %zmm2, %zmm1
908; SKX64-NEXT:    vmovaps %zmm1, %zmm0
909; SKX64-NEXT:    retq
910;
911; KNL64-LABEL: test_masked_permps_v16f32:
912; KNL64:       # %bb.0:
913; KNL64-NEXT:    vmovaps (%rdi), %zmm2
914; KNL64-NEXT:    vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31]
915; KNL64-NEXT:    vpermi2ps %zmm0, %zmm2, %zmm1
916; KNL64-NEXT:    vmovaps %zmm1, %zmm0
917; KNL64-NEXT:    retq
918;
919; SKX32-LABEL: test_masked_permps_v16f32:
920; SKX32:       # %bb.0:
921; SKX32-NEXT:    movl {{[0-9]+}}(%esp), %eax
922; SKX32-NEXT:    vmovaps (%eax), %zmm2
923; SKX32-NEXT:    vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31]
924; SKX32-NEXT:    vpermi2ps %zmm0, %zmm2, %zmm1
925; SKX32-NEXT:    vmovaps %zmm1, %zmm0
926; SKX32-NEXT:    retl
927;
928; KNL32-LABEL: test_masked_permps_v16f32:
929; KNL32:       # %bb.0:
930; KNL32-NEXT:    movl {{[0-9]+}}(%esp), %eax
931; KNL32-NEXT:    vmovaps (%eax), %zmm2
932; KNL32-NEXT:    vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31]
933; KNL32-NEXT:    vpermi2ps %zmm0, %zmm2, %zmm1
934; KNL32-NEXT:    vmovaps %zmm1, %zmm0
935; KNL32-NEXT:    retl
936  %vec = load <16 x float>, <16 x float>* %vp
937  %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 13, i32 11, i32 9, i32 14, i32 12, i32 10, i32 8, i32 7, i32 6, i32 3, i32 0, i32 7, i32 6, i32 3, i32 0>
938  %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <16 x float> %shuf, <16 x float> %vec2
939  ret <16 x float> %res
940}
941