• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX
4
5; This test is an assembly of avx512 instructions to check their scheduling
6
7define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
8; GENERIC-LABEL: addpd512:
9; GENERIC:       # %bb.0: # %entry
10; GENERIC-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
11; GENERIC-NEXT:    retq # sched: [1:1.00]
12;
13; SKX-LABEL: addpd512:
14; SKX:       # %bb.0: # %entry
15; SKX-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
16; SKX-NEXT:    retq # sched: [7:1.00]
17entry:
18  %add.i = fadd <8 x double> %x, %y
19  ret <8 x double> %add.i
20}
21
22define <8 x double> @addpd512fold(<8 x double> %y) {
23; GENERIC-LABEL: addpd512fold:
24; GENERIC:       # %bb.0: # %entry
25; GENERIC-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00]
26; GENERIC-NEXT:    retq # sched: [1:1.00]
27;
28; SKX-LABEL: addpd512fold:
29; SKX:       # %bb.0: # %entry
30; SKX-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
31; SKX-NEXT:    retq # sched: [7:1.00]
32entry:
33  %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
34  ret <8 x double> %add.i
35}
36
37define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
38; GENERIC-LABEL: addps512:
39; GENERIC:       # %bb.0: # %entry
40; GENERIC-NEXT:    vaddps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
41; GENERIC-NEXT:    retq # sched: [1:1.00]
42;
43; SKX-LABEL: addps512:
44; SKX:       # %bb.0: # %entry
45; SKX-NEXT:    vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
46; SKX-NEXT:    retq # sched: [7:1.00]
47entry:
48  %add.i = fadd <16 x float> %x, %y
49  ret <16 x float> %add.i
50}
51
52define <16 x float> @addps512fold(<16 x float> %y) {
53; GENERIC-LABEL: addps512fold:
54; GENERIC:       # %bb.0: # %entry
55; GENERIC-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00]
56; GENERIC-NEXT:    retq # sched: [1:1.00]
57;
58; SKX-LABEL: addps512fold:
59; SKX:       # %bb.0: # %entry
60; SKX-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
61; SKX-NEXT:    retq # sched: [7:1.00]
62entry:
63  %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
64  ret <16 x float> %add.i
65}
66
67define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
68; GENERIC-LABEL: subpd512:
69; GENERIC:       # %bb.0: # %entry
70; GENERIC-NEXT:    vsubpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
71; GENERIC-NEXT:    retq # sched: [1:1.00]
72;
73; SKX-LABEL: subpd512:
74; SKX:       # %bb.0: # %entry
75; SKX-NEXT:    vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
76; SKX-NEXT:    retq # sched: [7:1.00]
77entry:
78  %sub.i = fsub <8 x double> %x, %y
79  ret <8 x double> %sub.i
80}
81
82define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
83; GENERIC-LABEL: subpd512fold:
84; GENERIC:       # %bb.0: # %entry
85; GENERIC-NEXT:    vsubpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
86; GENERIC-NEXT:    retq # sched: [1:1.00]
87;
88; SKX-LABEL: subpd512fold:
89; SKX:       # %bb.0: # %entry
90; SKX-NEXT:    vsubpd (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
91; SKX-NEXT:    retq # sched: [7:1.00]
92entry:
93  %tmp2 = load <8 x double>, <8 x double>* %x, align 8
94  %sub.i = fsub <8 x double> %y, %tmp2
95  ret <8 x double> %sub.i
96}
97
98define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
99; GENERIC-LABEL: subps512:
100; GENERIC:       # %bb.0: # %entry
101; GENERIC-NEXT:    vsubps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
102; GENERIC-NEXT:    retq # sched: [1:1.00]
103;
104; SKX-LABEL: subps512:
105; SKX:       # %bb.0: # %entry
106; SKX-NEXT:    vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
107; SKX-NEXT:    retq # sched: [7:1.00]
108entry:
109  %sub.i = fsub <16 x float> %x, %y
110  ret <16 x float> %sub.i
111}
112
113define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
114; GENERIC-LABEL: subps512fold:
115; GENERIC:       # %bb.0: # %entry
116; GENERIC-NEXT:    vsubps (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
117; GENERIC-NEXT:    retq # sched: [1:1.00]
118;
119; SKX-LABEL: subps512fold:
120; SKX:       # %bb.0: # %entry
121; SKX-NEXT:    vsubps (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
122; SKX-NEXT:    retq # sched: [7:1.00]
123entry:
124  %tmp2 = load <16 x float>, <16 x float>* %x, align 4
125  %sub.i = fsub <16 x float> %y, %tmp2
126  ret <16 x float> %sub.i
127}
128
129define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
130; GENERIC-LABEL: imulq512:
131; GENERIC:       # %bb.0:
132; GENERIC-NEXT:    vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
133; GENERIC-NEXT:    retq # sched: [1:1.00]
134;
135; SKX-LABEL: imulq512:
136; SKX:       # %bb.0:
137; SKX-NEXT:    vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.50]
138; SKX-NEXT:    retq # sched: [7:1.00]
139  %z = mul <8 x i64>%x, %y
140  ret <8 x i64>%z
141}
142
143define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
144; GENERIC-LABEL: imulq256:
145; GENERIC:       # %bb.0:
146; GENERIC-NEXT:    vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
147; GENERIC-NEXT:    retq # sched: [1:1.00]
148;
149; SKX-LABEL: imulq256:
150; SKX:       # %bb.0:
151; SKX-NEXT:    vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.50]
152; SKX-NEXT:    retq # sched: [7:1.00]
153  %z = mul <4 x i64>%x, %y
154  ret <4 x i64>%z
155}
156
157define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
158; GENERIC-LABEL: imulq128:
159; GENERIC:       # %bb.0:
160; GENERIC-NEXT:    vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
161; GENERIC-NEXT:    retq # sched: [1:1.00]
162;
163; SKX-LABEL: imulq128:
164; SKX:       # %bb.0:
165; SKX-NEXT:    vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.50]
166; SKX-NEXT:    retq # sched: [7:1.00]
167  %z = mul <2 x i64>%x, %y
168  ret <2 x i64>%z
169}
170
171define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
172; GENERIC-LABEL: mulpd512:
173; GENERIC:       # %bb.0: # %entry
174; GENERIC-NEXT:    vmulpd %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
175; GENERIC-NEXT:    retq # sched: [1:1.00]
176;
177; SKX-LABEL: mulpd512:
178; SKX:       # %bb.0: # %entry
179; SKX-NEXT:    vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
180; SKX-NEXT:    retq # sched: [7:1.00]
181entry:
182  %mul.i = fmul <8 x double> %x, %y
183  ret <8 x double> %mul.i
184}
185
186define <8 x double> @mulpd512fold(<8 x double> %y) {
187; GENERIC-LABEL: mulpd512fold:
188; GENERIC:       # %bb.0: # %entry
189; GENERIC-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00]
190; GENERIC-NEXT:    retq # sched: [1:1.00]
191;
192; SKX-LABEL: mulpd512fold:
193; SKX:       # %bb.0: # %entry
194; SKX-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
195; SKX-NEXT:    retq # sched: [7:1.00]
196entry:
197  %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
198  ret <8 x double> %mul.i
199}
200
201define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
202; GENERIC-LABEL: mulps512:
203; GENERIC:       # %bb.0: # %entry
204; GENERIC-NEXT:    vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
205; GENERIC-NEXT:    retq # sched: [1:1.00]
206;
207; SKX-LABEL: mulps512:
208; SKX:       # %bb.0: # %entry
209; SKX-NEXT:    vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
210; SKX-NEXT:    retq # sched: [7:1.00]
211entry:
212  %mul.i = fmul <16 x float> %x, %y
213  ret <16 x float> %mul.i
214}
215
216define <16 x float> @mulps512fold(<16 x float> %y) {
217; GENERIC-LABEL: mulps512fold:
218; GENERIC:       # %bb.0: # %entry
219; GENERIC-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00]
220; GENERIC-NEXT:    retq # sched: [1:1.00]
221;
222; SKX-LABEL: mulps512fold:
223; SKX:       # %bb.0: # %entry
224; SKX-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
225; SKX-NEXT:    retq # sched: [7:1.00]
226entry:
227  %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
228  ret <16 x float> %mul.i
229}
230
231define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
232; GENERIC-LABEL: divpd512:
233; GENERIC:       # %bb.0: # %entry
234; GENERIC-NEXT:    vdivpd %zmm0, %zmm1, %zmm0 # sched: [45:44.00]
235; GENERIC-NEXT:    retq # sched: [1:1.00]
236;
237; SKX-LABEL: divpd512:
238; SKX:       # %bb.0: # %entry
239; SKX-NEXT:    vdivpd %zmm0, %zmm1, %zmm0 # sched: [23:16.00]
240; SKX-NEXT:    retq # sched: [7:1.00]
241entry:
242  %div.i = fdiv <8 x double> %x, %y
243  ret <8 x double> %div.i
244}
245
246define <8 x double> @divpd512fold(<8 x double> %y) {
247; GENERIC-LABEL: divpd512fold:
248; GENERIC:       # %bb.0: # %entry
249; GENERIC-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [52:44.00]
250; GENERIC-NEXT:    retq # sched: [1:1.00]
251;
252; SKX-LABEL: divpd512fold:
253; SKX:       # %bb.0: # %entry
254; SKX-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [30:16.00]
255; SKX-NEXT:    retq # sched: [7:1.00]
256entry:
257  %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
258  ret <8 x double> %div.i
259}
260
261define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
262; GENERIC-LABEL: divps512:
263; GENERIC:       # %bb.0: # %entry
264; GENERIC-NEXT:    vdivps %zmm0, %zmm1, %zmm0 # sched: [29:28.00]
265; GENERIC-NEXT:    retq # sched: [1:1.00]
266;
267; SKX-LABEL: divps512:
268; SKX:       # %bb.0: # %entry
269; SKX-NEXT:    vdivps %zmm0, %zmm1, %zmm0 # sched: [18:10.00]
270; SKX-NEXT:    retq # sched: [7:1.00]
271entry:
272  %div.i = fdiv <16 x float> %x, %y
273  ret <16 x float> %div.i
274}
275
276define <16 x float> @divps512fold(<16 x float> %y) {
277; GENERIC-LABEL: divps512fold:
278; GENERIC:       # %bb.0: # %entry
279; GENERIC-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [36:28.00]
280; GENERIC-NEXT:    retq # sched: [1:1.00]
281;
282; SKX-LABEL: divps512fold:
283; SKX:       # %bb.0: # %entry
284; SKX-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [25:10.00]
285; SKX-NEXT:    retq # sched: [7:1.00]
286entry:
287  %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
288  ret <16 x float> %div.i
289}
290
291define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
292; GENERIC-LABEL: vpaddq_test:
293; GENERIC:       # %bb.0:
294; GENERIC-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
295; GENERIC-NEXT:    retq # sched: [1:1.00]
296;
297; SKX-LABEL: vpaddq_test:
298; SKX:       # %bb.0:
299; SKX-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
300; SKX-NEXT:    retq # sched: [7:1.00]
301  %x = add <8 x i64> %i, %j
302  ret <8 x i64> %x
303}
304
305define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
306; GENERIC-LABEL: vpaddq_fold_test:
307; GENERIC:       # %bb.0:
308; GENERIC-NEXT:    vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
309; GENERIC-NEXT:    retq # sched: [1:1.00]
310;
311; SKX-LABEL: vpaddq_fold_test:
312; SKX:       # %bb.0:
313; SKX-NEXT:    vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
314; SKX-NEXT:    retq # sched: [7:1.00]
315  %tmp = load <8 x i64>, <8 x i64>* %j, align 4
316  %x = add <8 x i64> %i, %tmp
317  ret <8 x i64> %x
318}
319
320define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
321; GENERIC-LABEL: vpaddq_broadcast_test:
322; GENERIC:       # %bb.0:
323; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
324; GENERIC-NEXT:    retq # sched: [1:1.00]
325;
326; SKX-LABEL: vpaddq_broadcast_test:
327; SKX:       # %bb.0:
328; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
329; SKX-NEXT:    retq # sched: [7:1.00]
330  %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
331  ret <8 x i64> %x
332}
333
334define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
335; GENERIC-LABEL: vpaddq_broadcast2_test:
336; GENERIC:       # %bb.0:
337; GENERIC-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
338; GENERIC-NEXT:    retq # sched: [1:1.00]
339;
340; SKX-LABEL: vpaddq_broadcast2_test:
341; SKX:       # %bb.0:
342; SKX-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
343; SKX-NEXT:    retq # sched: [7:1.00]
344  %tmp = load i64, i64* %j
345  %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
346  %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
347  %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
348  %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
349  %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
350  %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
351  %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
352  %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
353  %x = add <8 x i64> %i, %j.7
354  ret <8 x i64> %x
355}
356
357define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
358; GENERIC-LABEL: vpaddd_test:
359; GENERIC:       # %bb.0:
360; GENERIC-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
361; GENERIC-NEXT:    retq # sched: [1:1.00]
362;
363; SKX-LABEL: vpaddd_test:
364; SKX:       # %bb.0:
365; SKX-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
366; SKX-NEXT:    retq # sched: [7:1.00]
367  %x = add <16 x i32> %i, %j
368  ret <16 x i32> %x
369}
370
371define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
372; GENERIC-LABEL: vpaddd_fold_test:
373; GENERIC:       # %bb.0:
374; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
375; GENERIC-NEXT:    retq # sched: [1:1.00]
376;
377; SKX-LABEL: vpaddd_fold_test:
378; SKX:       # %bb.0:
379; SKX-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
380; SKX-NEXT:    retq # sched: [7:1.00]
381  %tmp = load <16 x i32>, <16 x i32>* %j, align 4
382  %x = add <16 x i32> %i, %tmp
383  ret <16 x i32> %x
384}
385
386define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
387; GENERIC-LABEL: vpaddd_broadcast_test:
388; GENERIC:       # %bb.0:
389; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
390; GENERIC-NEXT:    retq # sched: [1:1.00]
391;
392; SKX-LABEL: vpaddd_broadcast_test:
393; SKX:       # %bb.0:
394; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
395; SKX-NEXT:    retq # sched: [7:1.00]
396  %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
397  ret <16 x i32> %x
398}
399
400define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
401; GENERIC-LABEL: vpaddd_mask_test:
402; GENERIC:       # %bb.0:
403; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
404; GENERIC-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.50]
405; GENERIC-NEXT:    retq # sched: [1:1.00]
406;
407; SKX-LABEL: vpaddd_mask_test:
408; SKX:       # %bb.0:
409; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
410; SKX-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.33]
411; SKX-NEXT:    retq # sched: [7:1.00]
412  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
413  %x = add <16 x i32> %i, %j
414  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
415  ret <16 x i32> %r
416}
417
418define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
419; GENERIC-LABEL: vpaddd_maskz_test:
420; GENERIC:       # %bb.0:
421; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
422; GENERIC-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
423; GENERIC-NEXT:    retq # sched: [1:1.00]
424;
425; SKX-LABEL: vpaddd_maskz_test:
426; SKX:       # %bb.0:
427; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
428; SKX-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
429; SKX-NEXT:    retq # sched: [7:1.00]
430  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
431  %x = add <16 x i32> %i, %j
432  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
433  ret <16 x i32> %r
434}
435
436define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
437; GENERIC-LABEL: vpaddd_mask_fold_test:
438; GENERIC:       # %bb.0:
439; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
440; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50]
441; GENERIC-NEXT:    retq # sched: [1:1.00]
442;
443; SKX-LABEL: vpaddd_mask_fold_test:
444; SKX:       # %bb.0:
445; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
446; SKX-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50]
447; SKX-NEXT:    retq # sched: [7:1.00]
448  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
449  %j = load <16 x i32>, <16 x i32>* %j.ptr
450  %x = add <16 x i32> %i, %j
451  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
452  ret <16 x i32> %r
453}
454
455define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
456; GENERIC-LABEL: vpaddd_mask_broadcast_test:
457; GENERIC:       # %bb.0:
458; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
459; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50]
460; GENERIC-NEXT:    retq # sched: [1:1.00]
461;
462; SKX-LABEL: vpaddd_mask_broadcast_test:
463; SKX:       # %bb.0:
464; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
465; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50]
466; SKX-NEXT:    retq # sched: [7:1.00]
467  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
468  %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
469  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
470  ret <16 x i32> %r
471}
472
473define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
474; GENERIC-LABEL: vpaddd_maskz_fold_test:
475; GENERIC:       # %bb.0:
476; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
477; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
478; GENERIC-NEXT:    retq # sched: [1:1.00]
479;
480; SKX-LABEL: vpaddd_maskz_fold_test:
481; SKX:       # %bb.0:
482; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
483; SKX-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
484; SKX-NEXT:    retq # sched: [7:1.00]
485  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
486  %j = load <16 x i32>, <16 x i32>* %j.ptr
487  %x = add <16 x i32> %i, %j
488  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
489  ret <16 x i32> %r
490}
491
492define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
493; GENERIC-LABEL: vpaddd_maskz_broadcast_test:
494; GENERIC:       # %bb.0:
495; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
496; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
497; GENERIC-NEXT:    retq # sched: [1:1.00]
498;
499; SKX-LABEL: vpaddd_maskz_broadcast_test:
500; SKX:       # %bb.0:
501; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
502; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
503; SKX-NEXT:    retq # sched: [7:1.00]
504  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
505  %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
506  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
507  ret <16 x i32> %r
508}
509
510define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
511; GENERIC-LABEL: vpsubq_test:
512; GENERIC:       # %bb.0:
513; GENERIC-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
514; GENERIC-NEXT:    retq # sched: [1:1.00]
515;
516; SKX-LABEL: vpsubq_test:
517; SKX:       # %bb.0:
518; SKX-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
519; SKX-NEXT:    retq # sched: [7:1.00]
520  %x = sub <8 x i64> %i, %j
521  ret <8 x i64> %x
522}
523
524define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
525; GENERIC-LABEL: vpsubd_test:
526; GENERIC:       # %bb.0:
527; GENERIC-NEXT:    vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
528; GENERIC-NEXT:    retq # sched: [1:1.00]
529;
530; SKX-LABEL: vpsubd_test:
531; SKX:       # %bb.0:
532; SKX-NEXT:    vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
533; SKX-NEXT:    retq # sched: [7:1.00]
534  %x = sub <16 x i32> %i, %j
535  ret <16 x i32> %x
536}
537
538define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
539; GENERIC-LABEL: vpmulld_test:
540; GENERIC:       # %bb.0:
541; GENERIC-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
542; GENERIC-NEXT:    retq # sched: [1:1.00]
543;
544; SKX-LABEL: vpmulld_test:
545; SKX:       # %bb.0:
546; SKX-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:1.00]
547; SKX-NEXT:    retq # sched: [7:1.00]
548  %x = mul <16 x i32> %i, %j
549  ret <16 x i32> %x
550}
551
552declare float @sqrtf(float) readnone
553define float @sqrtA(float %a) nounwind uwtable readnone ssp {
554; GENERIC-LABEL: sqrtA:
555; GENERIC:       # %bb.0: # %entry
556; GENERIC-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00]
557; GENERIC-NEXT:    retq # sched: [1:1.00]
558;
559; SKX-LABEL: sqrtA:
560; SKX:       # %bb.0: # %entry
561; SKX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
562; SKX-NEXT:    retq # sched: [7:1.00]
563entry:
564  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
565  ret float %conv1
566}
567
568declare double @sqrt(double) readnone
569define double @sqrtB(double %a) nounwind uwtable readnone ssp {
570; GENERIC-LABEL: sqrtB:
571; GENERIC:       # %bb.0: # %entry
572; GENERIC-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00]
573; GENERIC-NEXT:    retq # sched: [1:1.00]
574;
575; SKX-LABEL: sqrtB:
576; SKX:       # %bb.0: # %entry
577; SKX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00]
578; SKX-NEXT:    retq # sched: [7:1.00]
579entry:
580  %call = tail call double @sqrt(double %a) nounwind readnone
581  ret double %call
582}
583
584declare float @llvm.sqrt.f32(float)
585define float @sqrtC(float %a) nounwind {
586; GENERIC-LABEL: sqrtC:
587; GENERIC:       # %bb.0:
588; GENERIC-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00]
589; GENERIC-NEXT:    retq # sched: [1:1.00]
590;
591; SKX-LABEL: sqrtC:
592; SKX:       # %bb.0:
593; SKX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
594; SKX-NEXT:    retq # sched: [7:1.00]
595  %b = call float @llvm.sqrt.f32(float %a)
596  ret float %b
597}
598
599declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
600define <16 x float> @sqrtD(<16 x float> %a) nounwind {
601; GENERIC-LABEL: sqrtD:
602; GENERIC:       # %bb.0:
603; GENERIC-NEXT:    vsqrtps %zmm0, %zmm0 # sched: [29:28.00]
604; GENERIC-NEXT:    retq # sched: [1:1.00]
605;
606; SKX-LABEL: sqrtD:
607; SKX:       # %bb.0:
608; SKX-NEXT:    vsqrtps %zmm0, %zmm0 # sched: [20:12.00]
609; SKX-NEXT:    retq # sched: [7:1.00]
610  %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
611  ret <16 x float> %b
612}
613
614declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
615define <8 x double> @sqrtE(<8 x double> %a) nounwind {
616; GENERIC-LABEL: sqrtE:
617; GENERIC:       # %bb.0:
618; GENERIC-NEXT:    vsqrtpd %zmm0, %zmm0 # sched: [45:44.00]
619; GENERIC-NEXT:    retq # sched: [1:1.00]
620;
621; SKX-LABEL: sqrtE:
622; SKX:       # %bb.0:
623; SKX-NEXT:    vsqrtpd %zmm0, %zmm0 # sched: [32:24.00]
624; SKX-NEXT:    retq # sched: [7:1.00]
625  %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
626  ret <8 x double> %b
627}
628
629define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
630; GENERIC-LABEL: fadd_broadcast:
631; GENERIC:       # %bb.0:
632; GENERIC-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00]
633; GENERIC-NEXT:    retq # sched: [1:1.00]
634;
635; SKX-LABEL: fadd_broadcast:
636; SKX:       # %bb.0:
637; SKX-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
638; SKX-NEXT:    retq # sched: [7:1.00]
639  %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
640  ret <16 x float> %b
641}
642
643define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
644; GENERIC-LABEL: addq_broadcast:
645; GENERIC:       # %bb.0:
646; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
647; GENERIC-NEXT:    retq # sched: [1:1.00]
648;
649; SKX-LABEL: addq_broadcast:
650; SKX:       # %bb.0:
651; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
652; SKX-NEXT:    retq # sched: [7:1.00]
653  %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
654  ret <8 x i64> %b
655}
656
657define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
658; GENERIC-LABEL: orq_broadcast:
659; GENERIC:       # %bb.0:
660; GENERIC-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
661; GENERIC-NEXT:    retq # sched: [1:1.00]
662;
663; SKX-LABEL: orq_broadcast:
664; SKX:       # %bb.0:
665; SKX-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
666; SKX-NEXT:    retq # sched: [7:1.00]
667  %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
668  ret <8 x i64> %b
669}
670
671define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
672; GENERIC-LABEL: andd512fold:
673; GENERIC:       # %bb.0: # %entry
674; GENERIC-NEXT:    vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
675; GENERIC-NEXT:    retq # sched: [1:1.00]
676;
677; SKX-LABEL: andd512fold:
678; SKX:       # %bb.0: # %entry
679; SKX-NEXT:    vandps (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
680; SKX-NEXT:    retq # sched: [7:1.00]
681entry:
682  %a = load <16 x i32>, <16 x i32>* %x, align 4
683  %b = and <16 x i32> %y, %a
684  ret <16 x i32> %b
685}
686
687define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
688; GENERIC-LABEL: andqbrst:
689; GENERIC:       # %bb.0: # %entry
690; GENERIC-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
691; GENERIC-NEXT:    retq # sched: [1:1.00]
692;
693; SKX-LABEL: andqbrst:
694; SKX:       # %bb.0: # %entry
695; SKX-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
696; SKX-NEXT:    retq # sched: [7:1.00]
697entry:
698  %a = load i64, i64* %ap, align 8
699  %b = insertelement <8 x i64> undef, i64 %a, i32 0
700  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
701  %d = and <8 x i64> %p1, %c
702  ret <8 x i64>%d
703}
704
705define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
706; GENERIC-LABEL: test_mask_vaddps:
707; GENERIC:       # %bb.0:
708; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
709; GENERIC-NEXT:    vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
710; GENERIC-NEXT:    retq # sched: [1:1.00]
711;
712; SKX-LABEL: test_mask_vaddps:
713; SKX:       # %bb.0:
714; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
715; SKX-NEXT:    vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
716; SKX-NEXT:    retq # sched: [7:1.00]
717                                     <16 x float> %j, <16 x i32> %mask1)
718                                     nounwind readnone {
719  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
720  %x = fadd <16 x float> %i, %j
721  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
722  ret <16 x float> %r
723}
724
725define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
726; GENERIC-LABEL: test_mask_vmulps:
727; GENERIC:       # %bb.0:
728; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
729; GENERIC-NEXT:    vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00]
730; GENERIC-NEXT:    retq # sched: [1:1.00]
731;
732; SKX-LABEL: test_mask_vmulps:
733; SKX:       # %bb.0:
734; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
735; SKX-NEXT:    vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
736; SKX-NEXT:    retq # sched: [7:1.00]
737  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
738  %x = fmul <16 x float> %i, %j
739  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
740  ret <16 x float> %r
741}
742
743define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
744; GENERIC-LABEL: test_mask_vminps:
745; GENERIC:       # %bb.0:
746; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
747; GENERIC-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
748; GENERIC-NEXT:    retq # sched: [1:1.00]
749;
750; SKX-LABEL: test_mask_vminps:
751; SKX:       # %bb.0:
752; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
753; SKX-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
754; SKX-NEXT:    retq # sched: [7:1.00]
755  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
756  %cmp_res = fcmp olt <16 x float> %i, %j
757  %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
758  %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
759  ret <16 x float> %r
760}
761
762define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone {
763; GENERIC-LABEL: test_mask_vminpd:
764; GENERIC:       # %bb.0:
765; GENERIC-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
766; GENERIC-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
767; GENERIC-NEXT:    retq # sched: [1:1.00]
768;
769; SKX-LABEL: test_mask_vminpd:
770; SKX:       # %bb.0:
771; SKX-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
772; SKX-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
773; SKX-NEXT:    retq # sched: [7:1.00]
774  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
775  %cmp_res = fcmp olt <8 x double> %i, %j
776  %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
777  %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
778  ret <8 x double> %r
779}
780
781define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
782; GENERIC-LABEL: test_mask_vmaxps:
783; GENERIC:       # %bb.0:
784; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
785; GENERIC-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
786; GENERIC-NEXT:    retq # sched: [1:1.00]
787;
788; SKX-LABEL: test_mask_vmaxps:
789; SKX:       # %bb.0:
790; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
791; SKX-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
792; SKX-NEXT:    retq # sched: [7:1.00]
793  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
794  %cmp_res = fcmp ogt <16 x float> %i, %j
795  %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
796  %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
797  ret <16 x float> %r
798}
799
800define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone {
801; GENERIC-LABEL: test_mask_vmaxpd:
802; GENERIC:       # %bb.0:
803; GENERIC-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
804; GENERIC-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
805; GENERIC-NEXT:    retq # sched: [1:1.00]
806;
807; SKX-LABEL: test_mask_vmaxpd:
808; SKX:       # %bb.0:
809; SKX-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
810; SKX-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
811; SKX-NEXT:    retq # sched: [7:1.00]
812  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
813  %cmp_res = fcmp ogt <8 x double> %i, %j
814  %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
815  %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
816  ret <8 x double> %r
817}
818
819define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
820; GENERIC-LABEL: test_mask_vsubps:
821; GENERIC:       # %bb.0:
822; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
823; GENERIC-NEXT:    vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
824; GENERIC-NEXT:    retq # sched: [1:1.00]
825;
826; SKX-LABEL: test_mask_vsubps:
827; SKX:       # %bb.0:
828; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
829; SKX-NEXT:    vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
830; SKX-NEXT:    retq # sched: [7:1.00]
831  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
832  %x = fsub <16 x float> %i, %j
833  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
834  ret <16 x float> %r
835}
836
837define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
838; GENERIC-LABEL: test_mask_vdivps:
839; GENERIC:       # %bb.0:
840; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
841; GENERIC-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [29:28.00]
842; GENERIC-NEXT:    retq # sched: [1:1.00]
843;
844; SKX-LABEL: test_mask_vdivps:
845; SKX:       # %bb.0:
846; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
847; SKX-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [18:10.00]
848; SKX-NEXT:    retq # sched: [7:1.00]
849  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
850  %x = fdiv <16 x float> %i, %j
851  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
852  ret <16 x float> %r
853}
854
855define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone {
856; GENERIC-LABEL: test_mask_vaddpd:
857; GENERIC:       # %bb.0:
858; GENERIC-NEXT:    vptestmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
859; GENERIC-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
860; GENERIC-NEXT:    retq # sched: [1:1.00]
861;
862; SKX-LABEL: test_mask_vaddpd:
863; SKX:       # %bb.0:
864; SKX-NEXT:    vptestmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
865; SKX-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
866; SKX-NEXT:    retq # sched: [7:1.00]
867  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
868  %x = fadd <8 x double> %i, %j
869  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
870  ret <8 x double> %r
871}
872
873define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone {
874; GENERIC-LABEL: test_maskz_vaddpd:
875; GENERIC:       # %bb.0:
876; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
877; GENERIC-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
878; GENERIC-NEXT:    retq # sched: [1:1.00]
879;
880; SKX-LABEL: test_maskz_vaddpd:
881; SKX:       # %bb.0:
882; SKX-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
883; SKX-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.50]
884; SKX-NEXT:    retq # sched: [7:1.00]
885  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
886  %x = fadd <8 x double> %i, %j
887  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
888  ret <8 x double> %r
889}
890
891define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double>* %j,  <8 x i64> %mask1) nounwind {
892; GENERIC-LABEL: test_mask_fold_vaddpd:
893; GENERIC:       # %bb.0:
894; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
895; GENERIC-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00]
896; GENERIC-NEXT:    retq # sched: [1:1.00]
897;
898; SKX-LABEL: test_mask_fold_vaddpd:
899; SKX:       # %bb.0:
900; SKX-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
901; SKX-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [11:0.50]
902; SKX-NEXT:    retq # sched: [7:1.00]
903  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
904  %tmp = load <8 x double>, <8 x double>* %j, align 8
905  %x = fadd <8 x double> %i, %tmp
906  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
907  ret <8 x double> %r
908}
909
910define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind {
911; GENERIC-LABEL: test_maskz_fold_vaddpd:
912; GENERIC:       # %bb.0:
913; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
914; GENERIC-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00]
915; GENERIC-NEXT:    retq # sched: [1:1.00]
916;
917; SKX-LABEL: test_maskz_fold_vaddpd:
918; SKX:       # %bb.0:
919; SKX-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
920; SKX-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50]
921; SKX-NEXT:    retq # sched: [7:1.00]
922  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
923  %tmp = load <8 x double>, <8 x double>* %j, align 8
924  %x = fadd <8 x double> %i, %tmp
925  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
926  ret <8 x double> %r
927}
928
929define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
930; GENERIC-LABEL: test_broadcast_vaddpd:
931; GENERIC:       # %bb.0:
932; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [10:1.00]
933; GENERIC-NEXT:    retq # sched: [1:1.00]
934;
935; SKX-LABEL: test_broadcast_vaddpd:
936; SKX:       # %bb.0:
937; SKX-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [11:0.50]
938; SKX-NEXT:    retq # sched: [7:1.00]
939  %tmp = load double, double* %j
940  %b = insertelement <8 x double> undef, double %tmp, i32 0
941  %c = shufflevector <8 x double> %b, <8 x double> undef,
942                     <8 x i32> zeroinitializer
943  %x = fadd <8 x double> %c, %i
944  ret <8 x double> %x
945}
946
947define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind {
948; GENERIC-LABEL: test_mask_broadcast_vaddpd:
949; GENERIC:       # %bb.0:
950; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
951; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [10:1.00]
952; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
953; GENERIC-NEXT:    retq # sched: [1:1.00]
954;
955; SKX-LABEL: test_mask_broadcast_vaddpd:
956; SKX:       # %bb.0:
957; SKX-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
958; SKX-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50]
959; SKX-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:0.33]
960; SKX-NEXT:    retq # sched: [7:1.00]
961  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
962  %tmp = load double, double* %j
963  %b = insertelement <8 x double> undef, double %tmp, i32 0
964  %c = shufflevector <8 x double> %b, <8 x double> undef,
965                     <8 x i32> zeroinitializer
966  %x = fadd <8 x double> %c, %i
967  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
968  ret <8 x double> %r
969}
970
971define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
972; GENERIC-LABEL: test_maskz_broadcast_vaddpd:
973; GENERIC:       # %bb.0:
974; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
975; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00]
976; GENERIC-NEXT:    retq # sched: [1:1.00]
977;
978; SKX-LABEL: test_maskz_broadcast_vaddpd:
979; SKX:       # %bb.0:
980; SKX-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
981; SKX-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50]
982; SKX-NEXT:    retq # sched: [7:1.00]
983                                       <8 x i64> %mask1) nounwind {
984  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
985  %tmp = load double, double* %j
986  %b = insertelement <8 x double> undef, double %tmp, i32 0
987  %c = shufflevector <8 x double> %b, <8 x double> undef,
988                     <8 x i32> zeroinitializer
989  %x = fadd <8 x double> %c, %i
990  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
991  ret <8 x double> %r
992}
993
994define <16 x float>  @test_fxor(<16 x float> %a) {
995; GENERIC-LABEL: test_fxor:
996; GENERIC:       # %bb.0:
997; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
998; GENERIC-NEXT:    retq # sched: [1:1.00]
999;
1000; SKX-LABEL: test_fxor:
1001; SKX:       # %bb.0:
1002; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
1003; SKX-NEXT:    retq # sched: [7:1.00]
1004
1005  %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
1006  ret <16 x float>%res
1007}
1008
1009define <8 x float>  @test_fxor_8f32(<8 x float> %a) {
1010; GENERIC-LABEL: test_fxor_8f32:
1011; GENERIC:       # %bb.0:
1012; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00]
1013; GENERIC-NEXT:    retq # sched: [1:1.00]
1014;
1015; SKX-LABEL: test_fxor_8f32:
1016; SKX:       # %bb.0:
1017; SKX-NEXT:    vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
1018; SKX-NEXT:    retq # sched: [7:1.00]
1019  %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
1020  ret <8 x float>%res
1021}
1022
1023define <8 x double> @fabs_v8f64(<8 x double> %p)
1024; GENERIC-LABEL: fabs_v8f64:
1025; GENERIC:       # %bb.0:
1026; GENERIC-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
1027; GENERIC-NEXT:    retq # sched: [1:1.00]
1028;
1029; SKX-LABEL: fabs_v8f64:
1030; SKX:       # %bb.0:
1031; SKX-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
1032; SKX-NEXT:    retq # sched: [7:1.00]
1033{
1034  %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
1035  ret <8 x double> %t
1036}
1037declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
1038
1039define <16 x float> @fabs_v16f32(<16 x float> %p)
1040; GENERIC-LABEL: fabs_v16f32:
1041; GENERIC:       # %bb.0:
1042; GENERIC-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
1043; GENERIC-NEXT:    retq # sched: [1:1.00]
1044;
1045; SKX-LABEL: fabs_v16f32:
1046; SKX:       # %bb.0:
1047; SKX-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
1048; SKX-NEXT:    retq # sched: [7:1.00]
1049{
1050  %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1051  ret <16 x float> %t
1052}
1053declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1054
1055define double @test1(double %a, double %b) nounwind {
1056; GENERIC-LABEL: test1:
1057; GENERIC:       # %bb.0:
1058; GENERIC-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
1059; GENERIC-NEXT:    jne .LBB64_1 # sched: [1:1.00]
1060; GENERIC-NEXT:    jnp .LBB64_2 # sched: [1:1.00]
1061; GENERIC-NEXT:  .LBB64_1: # %l1
1062; GENERIC-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1063; GENERIC-NEXT:    retq # sched: [1:1.00]
1064; GENERIC-NEXT:  .LBB64_2: # %l2
1065; GENERIC-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1066; GENERIC-NEXT:    retq # sched: [1:1.00]
1067;
1068; SKX-LABEL: test1:
1069; SKX:       # %bb.0:
1070; SKX-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
1071; SKX-NEXT:    jne .LBB64_1 # sched: [1:0.50]
1072; SKX-NEXT:    jnp .LBB64_2 # sched: [1:0.50]
1073; SKX-NEXT:  .LBB64_1: # %l1
1074; SKX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1075; SKX-NEXT:    retq # sched: [7:1.00]
1076; SKX-NEXT:  .LBB64_2: # %l2
1077; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1078; SKX-NEXT:    retq # sched: [7:1.00]
1079  %tobool = fcmp une double %a, %b
1080  br i1 %tobool, label %l1, label %l2
1081
1082l1:
1083  %c = fsub double %a, %b
1084  ret double %c
1085l2:
1086  %c1 = fadd double %a, %b
1087  ret double %c1
1088}
1089
1090define float @test2(float %a, float %b) nounwind {
1091; GENERIC-LABEL: test2:
1092; GENERIC:       # %bb.0:
1093; GENERIC-NEXT:    vucomiss %xmm0, %xmm1 # sched: [2:1.00]
1094; GENERIC-NEXT:    jbe .LBB65_2 # sched: [1:1.00]
1095; GENERIC-NEXT:  # %bb.1: # %l1
1096; GENERIC-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1097; GENERIC-NEXT:    retq # sched: [1:1.00]
1098; GENERIC-NEXT:  .LBB65_2: # %l2
1099; GENERIC-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1100; GENERIC-NEXT:    retq # sched: [1:1.00]
1101;
1102; SKX-LABEL: test2:
1103; SKX:       # %bb.0:
1104; SKX-NEXT:    vucomiss %xmm0, %xmm1 # sched: [2:1.00]
1105; SKX-NEXT:    jbe .LBB65_2 # sched: [1:0.50]
1106; SKX-NEXT:  # %bb.1: # %l1
1107; SKX-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1108; SKX-NEXT:    retq # sched: [7:1.00]
1109; SKX-NEXT:  .LBB65_2: # %l2
1110; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1111; SKX-NEXT:    retq # sched: [7:1.00]
1112  %tobool = fcmp olt float %a, %b
1113  br i1 %tobool, label %l1, label %l2
1114
1115l1:
1116  %c = fsub float %a, %b
1117  ret float %c
1118l2:
1119  %c1 = fadd float %a, %b
1120  ret float %c1
1121}
1122
1123define i32 @test3(float %a, float %b) {
1124; GENERIC-LABEL: test3:
1125; GENERIC:       # %bb.0:
1126; GENERIC-NEXT:    vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00]
1127; GENERIC-NEXT:    kmovw %k0, %eax # sched: [1:0.33]
1128; GENERIC-NEXT:    retq # sched: [1:1.00]
1129;
1130; SKX-LABEL: test3:
1131; SKX:       # %bb.0:
1132; SKX-NEXT:    vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00]
1133; SKX-NEXT:    kmovw %k0, %eax # sched: [3:1.00]
1134; SKX-NEXT:    retq # sched: [7:1.00]
1135
1136  %cmp10.i = fcmp oeq float %a, %b
1137  %conv11.i = zext i1 %cmp10.i to i32
1138  ret i32 %conv11.i
1139}
1140
1141define float @test5(float %p) #0 {
1142; GENERIC-LABEL: test5:
1143; GENERIC:       # %bb.0: # %entry
1144; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
1145; GENERIC-NEXT:    vucomiss %xmm1, %xmm0 # sched: [2:1.00]
1146; GENERIC-NEXT:    jne .LBB67_1 # sched: [1:1.00]
1147; GENERIC-NEXT:    jp .LBB67_1 # sched: [1:1.00]
1148; GENERIC-NEXT:  # %bb.2: # %return
1149; GENERIC-NEXT:    retq # sched: [1:1.00]
1150; GENERIC-NEXT:  .LBB67_1: # %if.end
1151; GENERIC-NEXT:    seta %al # sched: [2:1.00]
1152; GENERIC-NEXT:    movzbl %al, %eax # sched: [1:0.33]
1153; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
1154; GENERIC-NEXT:    retq # sched: [1:1.00]
1155;
1156; SKX-LABEL: test5:
1157; SKX:       # %bb.0: # %entry
1158; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
1159; SKX-NEXT:    vucomiss %xmm1, %xmm0 # sched: [2:1.00]
1160; SKX-NEXT:    jne .LBB67_1 # sched: [1:0.50]
1161; SKX-NEXT:    jp .LBB67_1 # sched: [1:0.50]
1162; SKX-NEXT:  # %bb.2: # %return
1163; SKX-NEXT:    retq # sched: [7:1.00]
1164; SKX-NEXT:  .LBB67_1: # %if.end
1165; SKX-NEXT:    seta %al # sched: [2:1.00]
1166; SKX-NEXT:    movzbl %al, %eax # sched: [1:0.25]
1167; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
1168; SKX-NEXT:    retq # sched: [7:1.00]
1169entry:
1170  %cmp = fcmp oeq float %p, 0.000000e+00
1171  br i1 %cmp, label %return, label %if.end
1172
1173if.end:                                           ; preds = %entry
1174  %cmp1 = fcmp ogt float %p, 0.000000e+00
1175  %cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00
1176  br label %return
1177
1178return:                                           ; preds = %if.end, %entry
1179  %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ]
1180  ret float %retval.0
1181}
1182
1183define i32 @test6(i32 %a, i32 %b) {
1184; GENERIC-LABEL: test6:
1185; GENERIC:       # %bb.0:
1186; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
1187; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
1188; GENERIC-NEXT:    sete %al # sched: [1:0.50]
1189; GENERIC-NEXT:    retq # sched: [1:1.00]
1190;
1191; SKX-LABEL: test6:
1192; SKX:       # %bb.0:
1193; SKX-NEXT:    xorl %eax, %eax # sched: [1:0.25]
1194; SKX-NEXT:    cmpl %esi, %edi # sched: [1:0.25]
1195; SKX-NEXT:    sete %al # sched: [1:0.50]
1196; SKX-NEXT:    retq # sched: [7:1.00]
1197  %cmp = icmp eq i32 %a, %b
1198  %res = zext i1 %cmp to i32
1199  ret i32 %res
1200}
1201
1202define i32 @test7(double %x, double %y) #2 {
1203; GENERIC-LABEL: test7:
1204; GENERIC:       # %bb.0: # %entry
1205; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
1206; GENERIC-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
1207; GENERIC-NEXT:    setne %al # sched: [1:0.50]
1208; GENERIC-NEXT:    retq # sched: [1:1.00]
1209;
1210; SKX-LABEL: test7:
1211; SKX:       # %bb.0: # %entry
1212; SKX-NEXT:    xorl %eax, %eax # sched: [1:0.25]
1213; SKX-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
1214; SKX-NEXT:    setne %al # sched: [1:0.50]
1215; SKX-NEXT:    retq # sched: [7:1.00]
1216entry:
1217  %0 = fcmp one double %x, %y
1218  %or = zext i1 %0 to i32
1219  ret i32 %or
1220}
1221
1222define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
1223; GENERIC-LABEL: test8:
1224; GENERIC:       # %bb.0:
1225; GENERIC-NEXT:    xorl $-2147483648, %esi # imm = 0x80000000
1226; GENERIC-NEXT:    # sched: [1:0.33]
1227; GENERIC-NEXT:    testl %edx, %edx # sched: [1:0.33]
1228; GENERIC-NEXT:    movl $1, %eax # sched: [1:0.33]
1229; GENERIC-NEXT:    cmovel %eax, %edx # sched: [2:0.67]
1230; GENERIC-NEXT:    notl %edi # sched: [1:0.33]
1231; GENERIC-NEXT:    orl %edi, %esi # sched: [1:0.33]
1232; GENERIC-NEXT:    cmovnel %edx, %eax # sched: [2:0.67]
1233; GENERIC-NEXT:    retq # sched: [1:1.00]
1234;
1235; SKX-LABEL: test8:
1236; SKX:       # %bb.0:
1237; SKX-NEXT:    notl %edi # sched: [1:0.25]
1238; SKX-NEXT:    xorl $-2147483648, %esi # imm = 0x80000000
1239; SKX-NEXT:    # sched: [1:0.25]
1240; SKX-NEXT:    testl %edx, %edx # sched: [1:0.25]
1241; SKX-NEXT:    movl $1, %eax # sched: [1:0.25]
1242; SKX-NEXT:    cmovel %eax, %edx # sched: [1:0.50]
1243; SKX-NEXT:    orl %edi, %esi # sched: [1:0.25]
1244; SKX-NEXT:    cmovnel %edx, %eax # sched: [1:0.50]
1245; SKX-NEXT:    retq # sched: [7:1.00]
1246  %tmp1 = icmp eq i32 %a1, -1
1247  %tmp2 = icmp eq i32 %a2, -2147483648
1248  %tmp3 = and i1 %tmp1, %tmp2
1249  %tmp4 = icmp eq i32 %a3, 0
1250  %tmp5 = or i1 %tmp3, %tmp4
1251  %res = select i1 %tmp5, i32 1, i32 %a3
1252  ret i32 %res
1253}
1254
1255define i32 @test9(i64 %a) {
1256; GENERIC-LABEL: test9:
1257; GENERIC:       # %bb.0:
1258; GENERIC-NEXT:    testb $1, %dil # sched: [1:0.33]
1259; GENERIC-NEXT:    jne .LBB71_2 # sched: [1:1.00]
1260; GENERIC-NEXT:  # %bb.1: # %A
1261; GENERIC-NEXT:    movl $6, %eax # sched: [1:0.33]
1262; GENERIC-NEXT:    retq # sched: [1:1.00]
1263; GENERIC-NEXT:  .LBB71_2: # %B
1264; GENERIC-NEXT:    movl $7, %eax # sched: [1:0.33]
1265; GENERIC-NEXT:    retq # sched: [1:1.00]
1266;
1267; SKX-LABEL: test9:
1268; SKX:       # %bb.0:
1269; SKX-NEXT:    testb $1, %dil # sched: [1:0.25]
1270; SKX-NEXT:    jne .LBB71_2 # sched: [1:0.50]
1271; SKX-NEXT:  # %bb.1: # %A
1272; SKX-NEXT:    movl $6, %eax # sched: [1:0.25]
1273; SKX-NEXT:    retq # sched: [7:1.00]
1274; SKX-NEXT:  .LBB71_2: # %B
1275; SKX-NEXT:    movl $7, %eax # sched: [1:0.25]
1276; SKX-NEXT:    retq # sched: [7:1.00]
1277 %b = and i64 %a, 1
1278 %cmp10.i = icmp eq i64 %b, 0
1279 br i1 %cmp10.i, label %A, label %B
1280A:
1281 ret i32 6
1282B:
1283 ret i32 7
1284}
1285
1286define i32 @test10(i64 %b, i64 %c, i1 %d) {
1287; GENERIC-LABEL: test10:
1288; GENERIC:       # %bb.0:
1289; GENERIC-NEXT:    movl %edx, %eax # sched: [1:0.33]
1290; GENERIC-NEXT:    andb $1, %al # sched: [1:0.33]
1291; GENERIC-NEXT:    cmpq %rsi, %rdi # sched: [1:0.33]
1292; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
1293; GENERIC-NEXT:    orb %dl, %cl # sched: [1:0.33]
1294; GENERIC-NEXT:    andb $1, %cl # sched: [1:0.33]
1295; GENERIC-NEXT:    cmpb %cl, %al # sched: [1:0.33]
1296; GENERIC-NEXT:    je .LBB72_1 # sched: [1:1.00]
1297; GENERIC-NEXT:  # %bb.2: # %if.end.i
1298; GENERIC-NEXT:    movl $6, %eax # sched: [1:0.33]
1299; GENERIC-NEXT:    retq # sched: [1:1.00]
1300; GENERIC-NEXT:  .LBB72_1: # %if.then.i
1301; GENERIC-NEXT:    movl $5, %eax # sched: [1:0.33]
1302; GENERIC-NEXT:    retq # sched: [1:1.00]
1303;
1304; SKX-LABEL: test10:
1305; SKX:       # %bb.0:
1306; SKX-NEXT:    movl %edx, %eax # sched: [1:0.25]
1307; SKX-NEXT:    andb $1, %al # sched: [1:0.25]
1308; SKX-NEXT:    cmpq %rsi, %rdi # sched: [1:0.25]
1309; SKX-NEXT:    sete %cl # sched: [1:0.50]
1310; SKX-NEXT:    orb %dl, %cl # sched: [1:0.25]
1311; SKX-NEXT:    andb $1, %cl # sched: [1:0.25]
1312; SKX-NEXT:    cmpb %cl, %al # sched: [1:0.25]
1313; SKX-NEXT:    je .LBB72_1 # sched: [1:0.50]
1314; SKX-NEXT:  # %bb.2: # %if.end.i
1315; SKX-NEXT:    movl $6, %eax # sched: [1:0.25]
1316; SKX-NEXT:    retq # sched: [7:1.00]
1317; SKX-NEXT:  .LBB72_1: # %if.then.i
1318; SKX-NEXT:    movl $5, %eax # sched: [1:0.25]
1319; SKX-NEXT:    retq # sched: [7:1.00]
1320
1321  %cmp8.i = icmp eq i64 %b, %c
1322  %or1 = or i1 %d, %cmp8.i
1323  %xor1 = xor i1 %d, %or1
1324  br i1 %xor1, label %if.end.i, label %if.then.i
1325
1326if.then.i:
1327 ret i32 5
1328
1329if.end.i:
1330  ret i32 6
1331}
1332
1333define <16 x float> @sitof32(<16 x i32> %a) nounwind {
1334; GENERIC-LABEL: sitof32:
1335; GENERIC:       # %bb.0:
1336; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
1337; GENERIC-NEXT:    retq # sched: [1:1.00]
1338;
1339; SKX-LABEL: sitof32:
1340; SKX:       # %bb.0:
1341; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
1342; SKX-NEXT:    retq # sched: [7:1.00]
1343  %b = sitofp <16 x i32> %a to <16 x float>
1344  ret <16 x float> %b
1345}
1346
1347define <8 x double> @sltof864(<8 x i64> %a) {
1348; GENERIC-LABEL: sltof864:
1349; GENERIC:       # %bb.0:
1350; GENERIC-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
1351; GENERIC-NEXT:    retq # sched: [1:1.00]
1352;
1353; SKX-LABEL: sltof864:
1354; SKX:       # %bb.0:
1355; SKX-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
1356; SKX-NEXT:    retq # sched: [7:1.00]
1357  %b = sitofp <8 x i64> %a to <8 x double>
1358  ret <8 x double> %b
1359}
1360
1361define <4 x double> @slto4f64(<4 x i64> %a) {
1362; GENERIC-LABEL: slto4f64:
1363; GENERIC:       # %bb.0:
1364; GENERIC-NEXT:    vcvtqq2pd %ymm0, %ymm0 # sched: [4:1.00]
1365; GENERIC-NEXT:    retq # sched: [1:1.00]
1366;
1367; SKX-LABEL: slto4f64:
1368; SKX:       # %bb.0:
1369; SKX-NEXT:    vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.50]
1370; SKX-NEXT:    retq # sched: [7:1.00]
1371  %b = sitofp <4 x i64> %a to <4 x double>
1372  ret <4 x double> %b
1373}
1374
1375define <2 x double> @slto2f64(<2 x i64> %a) {
1376; GENERIC-LABEL: slto2f64:
1377; GENERIC:       # %bb.0:
1378; GENERIC-NEXT:    vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
1379; GENERIC-NEXT:    retq # sched: [1:1.00]
1380;
1381; SKX-LABEL: slto2f64:
1382; SKX:       # %bb.0:
1383; SKX-NEXT:    vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.50]
1384; SKX-NEXT:    retq # sched: [7:1.00]
1385  %b = sitofp <2 x i64> %a to <2 x double>
1386  ret <2 x double> %b
1387}
1388
1389define <2 x float> @sltof2f32(<2 x i64> %a) {
1390; GENERIC-LABEL: sltof2f32:
1391; GENERIC:       # %bb.0:
1392; GENERIC-NEXT:    vcvtqq2ps %xmm0, %xmm0 # sched: [3:1.00]
1393; GENERIC-NEXT:    retq # sched: [1:1.00]
1394;
1395; SKX-LABEL: sltof2f32:
1396; SKX:       # %bb.0:
1397; SKX-NEXT:    vcvtqq2ps %xmm0, %xmm0 # sched: [5:1.00]
1398; SKX-NEXT:    retq # sched: [7:1.00]
1399  %b = sitofp <2 x i64> %a to <2 x float>
1400  ret <2 x float>%b
1401}
1402
1403define <4 x float> @slto4f32_mem(<4 x i64>* %a) {
1404; GENERIC-LABEL: slto4f32_mem:
1405; GENERIC:       # %bb.0:
1406; GENERIC-NEXT:    vcvtqq2psy (%rdi), %xmm0 # sched: [10:1.00]
1407; GENERIC-NEXT:    retq # sched: [1:1.00]
1408;
1409; SKX-LABEL: slto4f32_mem:
1410; SKX:       # %bb.0:
1411; SKX-NEXT:    vcvtqq2psy (%rdi), %xmm0 # sched: [11:0.50]
1412; SKX-NEXT:    retq # sched: [7:1.00]
1413  %a1 = load <4 x i64>, <4 x i64>* %a, align 8
1414  %b = sitofp <4 x i64> %a1 to <4 x float>
1415  ret <4 x float>%b
1416}
1417
1418define <4 x i64> @f64to4sl(<4 x double> %a) {
1419; GENERIC-LABEL: f64to4sl:
1420; GENERIC:       # %bb.0:
1421; GENERIC-NEXT:    vcvttpd2qq %ymm0, %ymm0 # sched: [4:1.00]
1422; GENERIC-NEXT:    retq # sched: [1:1.00]
1423;
1424; SKX-LABEL: f64to4sl:
1425; SKX:       # %bb.0:
1426; SKX-NEXT:    vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.50]
1427; SKX-NEXT:    retq # sched: [7:1.00]
1428  %b = fptosi <4 x double> %a to <4 x i64>
1429  ret <4 x i64> %b
1430}
1431
1432define <4 x i64> @f32to4sl(<4 x float> %a) {
1433; GENERIC-LABEL: f32to4sl:
1434; GENERIC:       # %bb.0:
1435; GENERIC-NEXT:    vcvttps2qq %xmm0, %ymm0 # sched: [3:1.00]
1436; GENERIC-NEXT:    retq # sched: [1:1.00]
1437;
1438; SKX-LABEL: f32to4sl:
1439; SKX:       # %bb.0:
1440; SKX-NEXT:    vcvttps2qq %xmm0, %ymm0 # sched: [7:1.00]
1441; SKX-NEXT:    retq # sched: [7:1.00]
1442  %b = fptosi <4 x float> %a to <4 x i64>
1443  ret <4 x i64> %b
1444}
1445
1446define <4 x float> @slto4f32(<4 x i64> %a) {
1447; GENERIC-LABEL: slto4f32:
1448; GENERIC:       # %bb.0:
1449; GENERIC-NEXT:    vcvtqq2ps %ymm0, %xmm0 # sched: [3:1.00]
1450; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1451; GENERIC-NEXT:    retq # sched: [1:1.00]
1452;
1453; SKX-LABEL: slto4f32:
1454; SKX:       # %bb.0:
1455; SKX-NEXT:    vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00]
1456; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1457; SKX-NEXT:    retq # sched: [7:1.00]
1458  %b = sitofp <4 x i64> %a to <4 x float>
1459  ret <4 x float> %b
1460}
1461
1462define <4 x float> @ulto4f32(<4 x i64> %a) {
1463; GENERIC-LABEL: ulto4f32:
1464; GENERIC:       # %bb.0:
1465; GENERIC-NEXT:    vcvtuqq2ps %ymm0, %xmm0 # sched: [3:1.00]
1466; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1467; GENERIC-NEXT:    retq # sched: [1:1.00]
1468;
1469; SKX-LABEL: ulto4f32:
1470; SKX:       # %bb.0:
1471; SKX-NEXT:    vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00]
1472; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1473; SKX-NEXT:    retq # sched: [7:1.00]
1474  %b = uitofp <4 x i64> %a to <4 x float>
1475  ret <4 x float> %b
1476}
1477
1478define <8 x double> @ulto8f64(<8 x i64> %a) {
1479; GENERIC-LABEL: ulto8f64:
1480; GENERIC:       # %bb.0:
1481; GENERIC-NEXT:    vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00]
1482; GENERIC-NEXT:    retq # sched: [1:1.00]
1483;
1484; SKX-LABEL: ulto8f64:
1485; SKX:       # %bb.0:
1486; SKX-NEXT:    vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50]
1487; SKX-NEXT:    retq # sched: [7:1.00]
1488  %b = uitofp <8 x i64> %a to <8 x double>
1489  ret <8 x double> %b
1490}
1491
1492define <16 x double> @ulto16f64(<16 x i64> %a) {
1493; GENERIC-LABEL: ulto16f64:
1494; GENERIC:       # %bb.0:
1495; GENERIC-NEXT:    vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00]
1496; GENERIC-NEXT:    vcvtuqq2pd %zmm1, %zmm1 # sched: [4:1.00]
1497; GENERIC-NEXT:    retq # sched: [1:1.00]
1498;
1499; SKX-LABEL: ulto16f64:
1500; SKX:       # %bb.0:
1501; SKX-NEXT:    vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50]
1502; SKX-NEXT:    vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.50]
1503; SKX-NEXT:    retq # sched: [7:1.00]
1504  %b = uitofp <16 x i64> %a to <16 x double>
1505  ret <16 x double> %b
1506}
1507
1508define <16 x i32> @f64to16si(<16 x float> %a) nounwind {
1509; GENERIC-LABEL: f64to16si:
1510; GENERIC:       # %bb.0:
1511; GENERIC-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
1512; GENERIC-NEXT:    retq # sched: [1:1.00]
1513;
1514; SKX-LABEL: f64to16si:
1515; SKX:       # %bb.0:
1516; SKX-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
1517; SKX-NEXT:    retq # sched: [7:1.00]
1518  %b = fptosi <16 x float> %a to <16 x i32>
1519  ret <16 x i32> %b
1520}
1521
1522define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
1523; GENERIC-LABEL: f32to16ui:
1524; GENERIC:       # %bb.0:
1525; GENERIC-NEXT:    vcvttps2udq %zmm0, %zmm0 # sched: [3:1.00]
1526; GENERIC-NEXT:    retq # sched: [1:1.00]
1527;
1528; SKX-LABEL: f32to16ui:
1529; SKX:       # %bb.0:
1530; SKX-NEXT:    vcvttps2udq %zmm0, %zmm0 # sched: [4:0.50]
1531; SKX-NEXT:    retq # sched: [7:1.00]
1532  %b = fptoui <16 x float> %a to <16 x i32>
1533  ret <16 x i32> %b
1534}
1535
1536define <16 x i8> @f32to16uc(<16 x float> %f) {
1537; GENERIC-LABEL: f32to16uc:
1538; GENERIC:       # %bb.0:
1539; GENERIC-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
1540; GENERIC-NEXT:    vpmovdb %zmm0, %xmm0 # sched: [1:1.00]
1541; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1542; GENERIC-NEXT:    retq # sched: [1:1.00]
1543;
1544; SKX-LABEL: f32to16uc:
1545; SKX:       # %bb.0:
1546; SKX-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
1547; SKX-NEXT:    vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
1548; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1549; SKX-NEXT:    retq # sched: [7:1.00]
1550  %res = fptoui <16 x float> %f to <16 x i8>
1551  ret <16 x i8> %res
1552}
1553
1554define <16 x i16> @f32to16us(<16 x float> %f) {
1555; GENERIC-LABEL: f32to16us:
1556; GENERIC:       # %bb.0:
1557; GENERIC-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
1558; GENERIC-NEXT:    vpmovdw %zmm0, %ymm0 # sched: [1:1.00]
1559; GENERIC-NEXT:    retq # sched: [1:1.00]
1560;
1561; SKX-LABEL: f32to16us:
1562; SKX:       # %bb.0:
1563; SKX-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
1564; SKX-NEXT:    vpmovdw %zmm0, %ymm0 # sched: [4:2.00]
1565; SKX-NEXT:    retq # sched: [7:1.00]
1566  %res = fptoui <16 x float> %f to <16 x i16>
1567  ret <16 x i16> %res
1568}
1569
1570define <8 x i32> @f32to8ui(<8 x float> %a) nounwind {
1571; GENERIC-LABEL: f32to8ui:
1572; GENERIC:       # %bb.0:
1573; GENERIC-NEXT:    vcvttps2udq %ymm0, %ymm0 # sched: [3:1.00]
1574; GENERIC-NEXT:    retq # sched: [1:1.00]
1575;
1576; SKX-LABEL: f32to8ui:
1577; SKX:       # %bb.0:
1578; SKX-NEXT:    vcvttps2udq %ymm0, %ymm0 # sched: [4:0.50]
1579; SKX-NEXT:    retq # sched: [7:1.00]
1580  %b = fptoui <8 x float> %a to <8 x i32>
1581  ret <8 x i32> %b
1582}
1583
1584define <4 x i32> @f32to4ui(<4 x float> %a) nounwind {
1585; GENERIC-LABEL: f32to4ui:
1586; GENERIC:       # %bb.0:
1587; GENERIC-NEXT:    vcvttps2udq %xmm0, %xmm0 # sched: [3:1.00]
1588; GENERIC-NEXT:    retq # sched: [1:1.00]
1589;
1590; SKX-LABEL: f32to4ui:
1591; SKX:       # %bb.0:
1592; SKX-NEXT:    vcvttps2udq %xmm0, %xmm0 # sched: [4:0.50]
1593; SKX-NEXT:    retq # sched: [7:1.00]
1594  %b = fptoui <4 x float> %a to <4 x i32>
1595  ret <4 x i32> %b
1596}
1597
1598define <8 x i32> @f64to8ui(<8 x double> %a) nounwind {
1599; GENERIC-LABEL: f64to8ui:
1600; GENERIC:       # %bb.0:
1601; GENERIC-NEXT:    vcvttpd2udq %zmm0, %ymm0 # sched: [4:1.00]
1602; GENERIC-NEXT:    retq # sched: [1:1.00]
1603;
1604; SKX-LABEL: f64to8ui:
1605; SKX:       # %bb.0:
1606; SKX-NEXT:    vcvttpd2udq %zmm0, %ymm0 # sched: [7:1.00]
1607; SKX-NEXT:    retq # sched: [7:1.00]
1608  %b = fptoui <8 x double> %a to <8 x i32>
1609  ret <8 x i32> %b
1610}
1611
1612define <8 x i16> @f64to8us(<8 x double> %f) {
1613; GENERIC-LABEL: f64to8us:
1614; GENERIC:       # %bb.0:
1615; GENERIC-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00]
1616; GENERIC-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [1:1.00]
1617; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1618; GENERIC-NEXT:    retq # sched: [1:1.00]
1619;
1620; SKX-LABEL: f64to8us:
1621; SKX:       # %bb.0:
1622; SKX-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
1623; SKX-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
1624; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1625; SKX-NEXT:    retq # sched: [7:1.00]
1626  %res = fptoui <8 x double> %f to <8 x i16>
1627  ret <8 x i16> %res
1628}
1629
1630define <8 x i8> @f64to8uc(<8 x double> %f) {
1631; GENERIC-LABEL: f64to8uc:
1632; GENERIC:       # %bb.0:
1633; GENERIC-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00]
1634; GENERIC-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [1:1.00]
1635; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1636; GENERIC-NEXT:    retq # sched: [1:1.00]
1637;
1638; SKX-LABEL: f64to8uc:
1639; SKX:       # %bb.0:
1640; SKX-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
1641; SKX-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
1642; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1643; SKX-NEXT:    retq # sched: [7:1.00]
1644  %res = fptoui <8 x double> %f to <8 x i8>
1645  ret <8 x i8> %res
1646}
1647
1648define <4 x i32> @f64to4ui(<4 x double> %a) nounwind {
1649; GENERIC-LABEL: f64to4ui:
1650; GENERIC:       # %bb.0:
1651; GENERIC-NEXT:    vcvttpd2udq %ymm0, %xmm0 # sched: [4:1.00]
1652; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1653; GENERIC-NEXT:    retq # sched: [1:1.00]
1654;
1655; SKX-LABEL: f64to4ui:
1656; SKX:       # %bb.0:
1657; SKX-NEXT:    vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00]
1658; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1659; SKX-NEXT:    retq # sched: [7:1.00]
1660  %b = fptoui <4 x double> %a to <4 x i32>
1661  ret <4 x i32> %b
1662}
1663
1664define <8 x double> @sito8f64(<8 x i32> %a) {
1665; GENERIC-LABEL: sito8f64:
1666; GENERIC:       # %bb.0:
1667; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
1668; GENERIC-NEXT:    retq # sched: [1:1.00]
1669;
1670; SKX-LABEL: sito8f64:
1671; SKX:       # %bb.0:
1672; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
1673; SKX-NEXT:    retq # sched: [7:1.00]
1674  %b = sitofp <8 x i32> %a to <8 x double>
1675  ret <8 x double> %b
1676}
1677define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
1678; GENERIC-LABEL: i32to8f64_mask:
1679; GENERIC:       # %bb.0:
1680; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
1681; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
1682; GENERIC-NEXT:    retq # sched: [1:1.00]
1683;
1684; SKX-LABEL: i32to8f64_mask:
1685; SKX:       # %bb.0:
1686; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
1687; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50]
1688; SKX-NEXT:    retq # sched: [7:1.00]
1689; VLNOBW-LABEL: i32to8f64_mask:
1690; VLNOBW:       # %bb.0:
1691; VLNOBW-NEXT:    kmovw %edi, %k1
1692; VLNOBW-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
1693; VLNOBW-NEXT:    ret{{[l|q]}}
1694  %1 = bitcast i8 %c to <8 x i1>
1695  %2 = sitofp <8 x i32> %b to <8 x double>
1696  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
1697  ret <8 x double> %3
1698}
1699define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
1700; GENERIC-LABEL: sito8f64_maskz:
1701; GENERIC:       # %bb.0:
1702; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
1703; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
1704; GENERIC-NEXT:    retq # sched: [1:1.00]
1705;
1706; SKX-LABEL: sito8f64_maskz:
1707; SKX:       # %bb.0:
1708; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
1709; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50]
1710; SKX-NEXT:    retq # sched: [7:1.00]
1711; VLNOBW-LABEL: sito8f64_maskz:
1712; VLNOBW:       # %bb.0:
1713; VLNOBW-NEXT:    kmovw %edi, %k1
1714; VLNOBW-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
1715; VLNOBW-NEXT:    ret{{[l|q]}}
1716  %1 = bitcast i8 %b to <8 x i1>
1717  %2 = sitofp <8 x i32> %a to <8 x double>
1718  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
1719  ret <8 x double> %3
1720}
1721
1722define <8 x i32> @f64to8si(<8 x double> %a) {
1723; GENERIC-LABEL: f64to8si:
1724; GENERIC:       # %bb.0:
1725; GENERIC-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00]
1726; GENERIC-NEXT:    retq # sched: [1:1.00]
1727;
1728; SKX-LABEL: f64to8si:
1729; SKX:       # %bb.0:
1730; SKX-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
1731; SKX-NEXT:    retq # sched: [7:1.00]
1732  %b = fptosi <8 x double> %a to <8 x i32>
1733  ret <8 x i32> %b
1734}
1735
1736define <4 x i32> @f64to4si(<4 x double> %a) {
1737; GENERIC-LABEL: f64to4si:
1738; GENERIC:       # %bb.0:
1739; GENERIC-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
1740; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1741; GENERIC-NEXT:    retq # sched: [1:1.00]
1742;
1743; SKX-LABEL: f64to4si:
1744; SKX:       # %bb.0:
1745; SKX-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
1746; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1747; SKX-NEXT:    retq # sched: [7:1.00]
1748  %b = fptosi <4 x double> %a to <4 x i32>
1749  ret <4 x i32> %b
1750}
1751
1752define <16 x float> @f64to16f32(<16 x double> %b) nounwind {
1753; GENERIC-LABEL: f64to16f32:
1754; GENERIC:       # %bb.0:
1755; GENERIC-NEXT:    vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
1756; GENERIC-NEXT:    vcvtpd2ps %zmm1, %ymm1 # sched: [4:1.00]
1757; GENERIC-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
1758; GENERIC-NEXT:    retq # sched: [1:1.00]
1759;
1760; SKX-LABEL: f64to16f32:
1761; SKX:       # %bb.0:
1762; SKX-NEXT:    vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00]
1763; SKX-NEXT:    vcvtpd2ps %zmm1, %ymm1 # sched: [7:1.00]
1764; SKX-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
1765; SKX-NEXT:    retq # sched: [7:1.00]
1766  %a = fptrunc <16 x double> %b to <16 x float>
1767  ret <16 x float> %a
1768}
1769
1770define <4 x float> @f64to4f32(<4 x double> %b) {
1771; GENERIC-LABEL: f64to4f32:
1772; GENERIC:       # %bb.0:
1773; GENERIC-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
1774; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1775; GENERIC-NEXT:    retq # sched: [1:1.00]
1776;
1777; SKX-LABEL: f64to4f32:
1778; SKX:       # %bb.0:
1779; SKX-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
1780; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1781; SKX-NEXT:    retq # sched: [7:1.00]
1782  %a = fptrunc <4 x double> %b to <4 x float>
1783  ret <4 x float> %a
1784}
1785
1786define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) {
1787; GENERIC-LABEL: f64to4f32_mask:
1788; GENERIC:       # %bb.0:
1789; GENERIC-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
1790; GENERIC-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:0.33]
1791; GENERIC-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00]
1792; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
1793; GENERIC-NEXT:    retq # sched: [1:1.00]
1794;
1795; SKX-LABEL: f64to4f32_mask:
1796; SKX:       # %bb.0:
1797; SKX-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:0.50]
1798; SKX-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:1.00]
1799; SKX-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00]
1800; SKX-NEXT:    vzeroupper # sched: [4:1.00]
1801; SKX-NEXT:    retq # sched: [7:1.00]
1802  %a = fptrunc <4 x double> %b to <4 x float>
1803  %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
1804  ret <4 x float> %c
1805}
1806
1807define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
1808; GENERIC-LABEL: f64tof32_inreg:
1809; GENERIC:       # %bb.0:
1810; GENERIC-NEXT:    vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [4:1.00]
1811; GENERIC-NEXT:    retq # sched: [1:1.00]
1812;
1813; SKX-LABEL: f64tof32_inreg:
1814; SKX:       # %bb.0:
1815; SKX-NEXT:    vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
1816; SKX-NEXT:    retq # sched: [7:1.00]
1817  %ext = extractelement <2 x double> %a0, i32 0
1818  %cvt = fptrunc double %ext to float
1819  %res = insertelement <4 x float> %a1, float %cvt, i32 0
1820  ret <4 x float> %res
1821}
1822
1823define <8 x double> @f32to8f64(<8 x float> %b) nounwind {
1824; GENERIC-LABEL: f32to8f64:
1825; GENERIC:       # %bb.0:
1826; GENERIC-NEXT:    vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
1827; GENERIC-NEXT:    retq # sched: [1:1.00]
1828;
1829; SKX-LABEL: f32to8f64:
1830; SKX:       # %bb.0:
1831; SKX-NEXT:    vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00]
1832; SKX-NEXT:    retq # sched: [7:1.00]
1833  %a = fpext <8 x float> %b to <8 x double>
1834  ret <8 x double> %a
1835}
1836
1837define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) {
1838; GENERIC-LABEL: f32to4f64_mask:
1839; GENERIC:       # %bb.0:
1840; GENERIC-NEXT:    vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
1841; GENERIC-NEXT:    vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [2:1.00]
1842; GENERIC-NEXT:    retq # sched: [1:1.00]
1843;
1844; SKX-LABEL: f32to4f64_mask:
1845; SKX:       # %bb.0:
1846; SKX-NEXT:    vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
1847; SKX-NEXT:    vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [7:1.00]
1848; SKX-NEXT:    retq # sched: [7:1.00]
1849  %a = fpext <4 x float> %b to <4 x double>
1850  %mask = fcmp ogt <4 x double> %a1, %b1
1851  %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
1852  ret <4 x double> %c
1853}
1854
1855define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
1856; GENERIC-LABEL: f32tof64_inreg:
1857; GENERIC:       # %bb.0:
1858; GENERIC-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
1859; GENERIC-NEXT:    retq # sched: [1:1.00]
1860;
1861; SKX-LABEL: f32tof64_inreg:
1862; SKX:       # %bb.0:
1863; SKX-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
1864; SKX-NEXT:    retq # sched: [7:1.00]
1865  %ext = extractelement <4 x float> %a1, i32 0
1866  %cvt = fpext float %ext to double
1867  %res = insertelement <2 x double> %a0, double %cvt, i32 0
1868  ret <2 x double> %res
1869}
1870
1871define double @sltof64_load(i64* nocapture %e) {
1872; GENERIC-LABEL: sltof64_load:
1873; GENERIC:       # %bb.0: # %entry
1874; GENERIC-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1875; GENERIC-NEXT:    retq # sched: [1:1.00]
1876;
1877; SKX-LABEL: sltof64_load:
1878; SKX:       # %bb.0: # %entry
1879; SKX-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1880; SKX-NEXT:    retq # sched: [7:1.00]
1881entry:
1882  %tmp1 = load i64, i64* %e, align 8
1883  %conv = sitofp i64 %tmp1 to double
1884  ret double %conv
1885}
1886
1887define double @sitof64_load(i32* %e) {
1888; GENERIC-LABEL: sitof64_load:
1889; GENERIC:       # %bb.0: # %entry
1890; GENERIC-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1891; GENERIC-NEXT:    retq # sched: [1:1.00]
1892;
1893; SKX-LABEL: sitof64_load:
1894; SKX:       # %bb.0: # %entry
1895; SKX-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1896; SKX-NEXT:    retq # sched: [7:1.00]
1897entry:
1898  %tmp1 = load i32, i32* %e, align 4
1899  %conv = sitofp i32 %tmp1 to double
1900  ret double %conv
1901}
1902
1903define float @sitof32_load(i32* %e) {
1904; GENERIC-LABEL: sitof32_load:
1905; GENERIC:       # %bb.0: # %entry
1906; GENERIC-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
1907; GENERIC-NEXT:    retq # sched: [1:1.00]
1908;
1909; SKX-LABEL: sitof32_load:
1910; SKX:       # %bb.0: # %entry
1911; SKX-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1912; SKX-NEXT:    retq # sched: [7:1.00]
1913entry:
1914  %tmp1 = load i32, i32* %e, align 4
1915  %conv = sitofp i32 %tmp1 to float
1916  ret float %conv
1917}
1918
1919define float @sltof32_load(i64* %e) {
1920; GENERIC-LABEL: sltof32_load:
1921; GENERIC:       # %bb.0: # %entry
1922; GENERIC-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
1923; GENERIC-NEXT:    retq # sched: [1:1.00]
1924;
1925; SKX-LABEL: sltof32_load:
1926; SKX:       # %bb.0: # %entry
1927; SKX-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1928; SKX-NEXT:    retq # sched: [7:1.00]
1929entry:
1930  %tmp1 = load i64, i64* %e, align 8
1931  %conv = sitofp i64 %tmp1 to float
1932  ret float %conv
1933}
1934
1935define void @f32tof64_loadstore() {
1936; GENERIC-LABEL: f32tof64_loadstore:
1937; GENERIC:       # %bb.0: # %entry
1938; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
1939; GENERIC-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
1940; GENERIC-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1941; GENERIC-NEXT:    retq # sched: [1:1.00]
1942;
1943; SKX-LABEL: f32tof64_loadstore:
1944; SKX:       # %bb.0: # %entry
1945; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
1946; SKX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
1947; SKX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1948; SKX-NEXT:    retq # sched: [7:1.00]
1949entry:
1950  %f = alloca float, align 4
1951  %d = alloca double, align 8
1952  %tmp = load float, float* %f, align 4
1953  %conv = fpext float %tmp to double
1954  store double %conv, double* %d, align 8
1955  ret void
1956}
1957
1958define void @f64tof32_loadstore() nounwind uwtable {
1959; GENERIC-LABEL: f64tof32_loadstore:
1960; GENERIC:       # %bb.0: # %entry
1961; GENERIC-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
1962; GENERIC-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
1963; GENERIC-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1964; GENERIC-NEXT:    retq # sched: [1:1.00]
1965;
1966; SKX-LABEL: f64tof32_loadstore:
1967; SKX:       # %bb.0: # %entry
1968; SKX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
1969; SKX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
1970; SKX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1971; SKX-NEXT:    retq # sched: [7:1.00]
1972entry:
1973  %f = alloca float, align 4
1974  %d = alloca double, align 8
1975  %tmp = load double, double* %d, align 8
1976  %conv = fptrunc double %tmp to float
1977  store float %conv, float* %f, align 4
1978  ret void
1979}
1980
1981define double @long_to_double(i64 %x) {
1982; GENERIC-LABEL: long_to_double:
1983; GENERIC:       # %bb.0:
1984; GENERIC-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
1985; GENERIC-NEXT:    retq # sched: [1:1.00]
1986;
1987; SKX-LABEL: long_to_double:
1988; SKX:       # %bb.0:
1989; SKX-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
1990; SKX-NEXT:    retq # sched: [7:1.00]
1991   %res = bitcast i64 %x to double
1992   ret double %res
1993}
1994
1995define i64 @double_to_long(double %x) {
1996; GENERIC-LABEL: double_to_long:
1997; GENERIC:       # %bb.0:
1998; GENERIC-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
1999; GENERIC-NEXT:    retq # sched: [1:1.00]
2000;
2001; SKX-LABEL: double_to_long:
2002; SKX:       # %bb.0:
2003; SKX-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
2004; SKX-NEXT:    retq # sched: [7:1.00]
2005   %res = bitcast double %x to i64
2006   ret i64 %res
2007}
2008
2009define float @int_to_float(i32 %x) {
2010; GENERIC-LABEL: int_to_float:
2011; GENERIC:       # %bb.0:
2012; GENERIC-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
2013; GENERIC-NEXT:    retq # sched: [1:1.00]
2014;
2015; SKX-LABEL: int_to_float:
2016; SKX:       # %bb.0:
2017; SKX-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
2018; SKX-NEXT:    retq # sched: [7:1.00]
2019   %res = bitcast i32 %x to float
2020   ret float %res
2021}
2022
2023define i32 @float_to_int(float %x) {
2024; GENERIC-LABEL: float_to_int:
2025; GENERIC:       # %bb.0:
2026; GENERIC-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
2027; GENERIC-NEXT:    retq # sched: [1:1.00]
2028;
2029; SKX-LABEL: float_to_int:
2030; SKX:       # %bb.0:
2031; SKX-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
2032; SKX-NEXT:    retq # sched: [7:1.00]
2033   %res = bitcast float %x to i32
2034   ret i32 %res
2035}
2036
2037define <16 x double> @uito16f64(<16 x i32> %a) nounwind {
2038; GENERIC-LABEL: uito16f64:
2039; GENERIC:       # %bb.0:
2040; GENERIC-NEXT:    vcvtudq2pd %ymm0, %zmm2 # sched: [4:1.00]
2041; GENERIC-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00]
2042; GENERIC-NEXT:    vcvtudq2pd %ymm0, %zmm1 # sched: [4:1.00]
2043; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
2044; GENERIC-NEXT:    retq # sched: [1:1.00]
2045;
2046; SKX-LABEL: uito16f64:
2047; SKX:       # %bb.0:
2048; SKX-NEXT:    vcvtudq2pd %ymm0, %zmm2 # sched: [7:1.00]
2049; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00]
2050; SKX-NEXT:    vcvtudq2pd %ymm0, %zmm1 # sched: [7:1.00]
2051; SKX-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:0.33]
2052; SKX-NEXT:    retq # sched: [7:1.00]
2053  %b = uitofp <16 x i32> %a to <16 x double>
2054  ret <16 x double> %b
2055}
2056
2057define <8 x float> @slto8f32(<8 x i64> %a) {
2058; GENERIC-LABEL: slto8f32:
2059; GENERIC:       # %bb.0:
2060; GENERIC-NEXT:    vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00]
2061; GENERIC-NEXT:    retq # sched: [1:1.00]
2062;
2063; SKX-LABEL: slto8f32:
2064; SKX:       # %bb.0:
2065; SKX-NEXT:    vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00]
2066; SKX-NEXT:    retq # sched: [7:1.00]
2067  %b = sitofp <8 x i64> %a to <8 x float>
2068  ret <8 x float> %b
2069}
2070
2071define <16 x float> @slto16f32(<16 x i64> %a) {
2072; GENERIC-LABEL: slto16f32:
2073; GENERIC:       # %bb.0:
2074; GENERIC-NEXT:    vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00]
2075; GENERIC-NEXT:    vcvtqq2ps %zmm1, %ymm1 # sched: [3:1.00]
2076; GENERIC-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
2077; GENERIC-NEXT:    retq # sched: [1:1.00]
2078;
2079; SKX-LABEL: slto16f32:
2080; SKX:       # %bb.0:
2081; SKX-NEXT:    vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00]
2082; SKX-NEXT:    vcvtqq2ps %zmm1, %ymm1 # sched: [7:1.00]
2083; SKX-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
2084; SKX-NEXT:    retq # sched: [7:1.00]
2085  %b = sitofp <16 x i64> %a to <16 x float>
2086  ret <16 x float> %b
2087}
2088
2089define <8 x double> @slto8f64(<8 x i64> %a) {
2090; GENERIC-LABEL: slto8f64:
2091; GENERIC:       # %bb.0:
2092; GENERIC-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
2093; GENERIC-NEXT:    retq # sched: [1:1.00]
2094;
2095; SKX-LABEL: slto8f64:
2096; SKX:       # %bb.0:
2097; SKX-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
2098; SKX-NEXT:    retq # sched: [7:1.00]
2099  %b = sitofp <8 x i64> %a to <8 x double>
2100  ret <8 x double> %b
2101}
2102
2103define <16 x double> @slto16f64(<16 x i64> %a) {
2104; GENERIC-LABEL: slto16f64:
2105; GENERIC:       # %bb.0:
2106; GENERIC-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
2107; GENERIC-NEXT:    vcvtqq2pd %zmm1, %zmm1 # sched: [4:1.00]
2108; GENERIC-NEXT:    retq # sched: [1:1.00]
2109;
2110; SKX-LABEL: slto16f64:
2111; SKX:       # %bb.0:
2112; SKX-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
2113; SKX-NEXT:    vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.50]
2114; SKX-NEXT:    retq # sched: [7:1.00]
2115  %b = sitofp <16 x i64> %a to <16 x double>
2116  ret <16 x double> %b
2117}
2118
2119define <8 x float> @ulto8f32(<8 x i64> %a) {
2120; GENERIC-LABEL: ulto8f32:
2121; GENERIC:       # %bb.0:
2122; GENERIC-NEXT:    vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00]
2123; GENERIC-NEXT:    retq # sched: [1:1.00]
2124;
2125; SKX-LABEL: ulto8f32:
2126; SKX:       # %bb.0:
2127; SKX-NEXT:    vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00]
2128; SKX-NEXT:    retq # sched: [7:1.00]
2129  %b = uitofp <8 x i64> %a to <8 x float>
2130  ret <8 x float> %b
2131}
2132
2133define <16 x float> @ulto16f32(<16 x i64> %a) {
2134; GENERIC-LABEL: ulto16f32:
2135; GENERIC:       # %bb.0:
2136; GENERIC-NEXT:    vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00]
2137; GENERIC-NEXT:    vcvtuqq2ps %zmm1, %ymm1 # sched: [3:1.00]
2138; GENERIC-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
2139; GENERIC-NEXT:    retq # sched: [1:1.00]
2140;
2141; SKX-LABEL: ulto16f32:
2142; SKX:       # %bb.0:
2143; SKX-NEXT:    vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00]
2144; SKX-NEXT:    vcvtuqq2ps %zmm1, %ymm1 # sched: [7:1.00]
2145; SKX-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
2146; SKX-NEXT:    retq # sched: [7:1.00]
2147  %b = uitofp <16 x i64> %a to <16 x float>
2148  ret <16 x float> %b
2149}
2150
2151define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
2152; GENERIC-LABEL: uito8f64_mask:
2153; GENERIC:       # %bb.0:
2154; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
2155; GENERIC-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
2156; GENERIC-NEXT:    retq # sched: [1:1.00]
2157;
2158; SKX-LABEL: uito8f64_mask:
2159; SKX:       # %bb.0:
2160; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
2161; SKX-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50]
2162; SKX-NEXT:    retq # sched: [7:1.00]
2163; VLNOBW-LABEL: uito8f64_mask:
2164; VLNOBW:       # %bb.0:
2165; VLNOBW-NEXT:    kmovw %edi, %k1
2166; VLNOBW-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
2167; VLNOBW-NEXT:    ret{{[l|q]}}
2168  %1 = bitcast i8 %c to <8 x i1>
2169  %2 = uitofp <8 x i32> %b to <8 x double>
2170  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
2171  ret <8 x double> %3
2172}
2173define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
2174; GENERIC-LABEL: uito8f64_maskz:
2175; GENERIC:       # %bb.0:
2176; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
2177; GENERIC-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
2178; GENERIC-NEXT:    retq # sched: [1:1.00]
2179;
2180; SKX-LABEL: uito8f64_maskz:
2181; SKX:       # %bb.0:
2182; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
2183; SKX-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50]
2184; SKX-NEXT:    retq # sched: [7:1.00]
2185  %1 = bitcast i8 %b to <8 x i1>
2186  %2 = uitofp <8 x i32> %a to <8 x double>
2187  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
2188  ret <8 x double> %3
2189}
2190
2191define <4 x double> @uito4f64(<4 x i32> %a) nounwind {
2192; GENERIC-LABEL: uito4f64:
2193; GENERIC:       # %bb.0:
2194; GENERIC-NEXT:    vcvtudq2pd %xmm0, %ymm0 # sched: [4:1.00]
2195; GENERIC-NEXT:    retq # sched: [1:1.00]
2196;
2197; SKX-LABEL: uito4f64:
2198; SKX:       # %bb.0:
2199; SKX-NEXT:    vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00]
2200; SKX-NEXT:    retq # sched: [7:1.00]
2201  %b = uitofp <4 x i32> %a to <4 x double>
2202  ret <4 x double> %b
2203}
2204
2205define <16 x float> @uito16f32(<16 x i32> %a) nounwind {
2206; GENERIC-LABEL: uito16f32:
2207; GENERIC:       # %bb.0:
2208; GENERIC-NEXT:    vcvtudq2ps %zmm0, %zmm0 # sched: [3:1.00]
2209; GENERIC-NEXT:    retq # sched: [1:1.00]
2210;
2211; SKX-LABEL: uito16f32:
2212; SKX:       # %bb.0:
2213; SKX-NEXT:    vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.50]
2214; SKX-NEXT:    retq # sched: [7:1.00]
2215  %b = uitofp <16 x i32> %a to <16 x float>
2216  ret <16 x float> %b
2217}
2218
2219define <8 x double> @uito8f64(<8 x i32> %a) {
2220; GENERIC-LABEL: uito8f64:
2221; GENERIC:       # %bb.0:
2222; GENERIC-NEXT:    vcvtudq2pd %ymm0, %zmm0 # sched: [4:1.00]
2223; GENERIC-NEXT:    retq # sched: [1:1.00]
2224;
2225; SKX-LABEL: uito8f64:
2226; SKX:       # %bb.0:
2227; SKX-NEXT:    vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00]
2228; SKX-NEXT:    retq # sched: [7:1.00]
2229  %b = uitofp <8 x i32> %a to <8 x double>
2230  ret <8 x double> %b
2231}
2232
2233define <8 x float> @uito8f32(<8 x i32> %a) nounwind {
2234; GENERIC-LABEL: uito8f32:
2235; GENERIC:       # %bb.0:
2236; GENERIC-NEXT:    vcvtudq2ps %ymm0, %ymm0 # sched: [3:1.00]
2237; GENERIC-NEXT:    retq # sched: [1:1.00]
2238;
2239; SKX-LABEL: uito8f32:
2240; SKX:       # %bb.0:
2241; SKX-NEXT:    vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.50]
2242; SKX-NEXT:    retq # sched: [7:1.00]
2243  %b = uitofp <8 x i32> %a to <8 x float>
2244  ret <8 x float> %b
2245}
2246
2247define <4 x float> @uito4f32(<4 x i32> %a) nounwind {
2248; GENERIC-LABEL: uito4f32:
2249; GENERIC:       # %bb.0:
2250; GENERIC-NEXT:    vcvtudq2ps %xmm0, %xmm0 # sched: [3:1.00]
2251; GENERIC-NEXT:    retq # sched: [1:1.00]
2252;
2253; SKX-LABEL: uito4f32:
2254; SKX:       # %bb.0:
2255; SKX-NEXT:    vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.50]
2256; SKX-NEXT:    retq # sched: [7:1.00]
2257  %b = uitofp <4 x i32> %a to <4 x float>
2258  ret <4 x float> %b
2259}
2260
2261define i32 @fptosi(float %a) nounwind {
2262; GENERIC-LABEL: fptosi:
2263; GENERIC:       # %bb.0:
2264; GENERIC-NEXT:    vcvttss2si %xmm0, %eax # sched: [5:1.00]
2265; GENERIC-NEXT:    retq # sched: [1:1.00]
2266;
2267; SKX-LABEL: fptosi:
2268; SKX:       # %bb.0:
2269; SKX-NEXT:    vcvttss2si %xmm0, %eax # sched: [6:1.00]
2270; SKX-NEXT:    retq # sched: [7:1.00]
2271  %b = fptosi float %a to i32
2272  ret i32 %b
2273}
2274
2275define i32 @fptoui(float %a) nounwind {
2276; GENERIC-LABEL: fptoui:
2277; GENERIC:       # %bb.0:
2278; GENERIC-NEXT:    vcvttss2usi %xmm0, %eax # sched: [5:1.00]
2279; GENERIC-NEXT:    retq # sched: [1:1.00]
2280;
2281; SKX-LABEL: fptoui:
2282; SKX:       # %bb.0:
2283; SKX-NEXT:    vcvttss2usi %xmm0, %eax # sched: [6:1.00]
2284; SKX-NEXT:    retq # sched: [7:1.00]
2285  %b = fptoui float %a to i32
2286  ret i32 %b
2287}
2288
2289define float @uitof32(i32 %a) nounwind {
2290; GENERIC-LABEL: uitof32:
2291; GENERIC:       # %bb.0:
2292; GENERIC-NEXT:    vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
2293; GENERIC-NEXT:    retq # sched: [1:1.00]
2294;
2295; SKX-LABEL: uitof32:
2296; SKX:       # %bb.0:
2297; SKX-NEXT:    vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
2298; SKX-NEXT:    retq # sched: [7:1.00]
2299  %b = uitofp i32 %a to float
2300  ret float %b
2301}
2302
2303define double @uitof64(i32 %a) nounwind {
2304; GENERIC-LABEL: uitof64:
2305; GENERIC:       # %bb.0:
2306; GENERIC-NEXT:    vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
2307; GENERIC-NEXT:    retq # sched: [1:1.00]
2308;
2309; SKX-LABEL: uitof64:
2310; SKX:       # %bb.0:
2311; SKX-NEXT:    vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
2312; SKX-NEXT:    retq # sched: [7:1.00]
2313  %b = uitofp i32 %a to double
2314  ret double %b
2315}
2316
2317define <16 x float> @sbto16f32(<16 x i32> %a) {
2318; GENERIC-LABEL: sbto16f32:
2319; GENERIC:       # %bb.0:
2320; GENERIC-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:0.33]
2321; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
2322; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
2323; GENERIC-NEXT:    retq # sched: [1:1.00]
2324;
2325; SKX-LABEL: sbto16f32:
2326; SKX:       # %bb.0:
2327; SKX-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:1.00]
2328; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
2329; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
2330; SKX-NEXT:    retq # sched: [7:1.00]
2331  %mask = icmp slt <16 x i32> %a, zeroinitializer
2332  %1 = sitofp <16 x i1> %mask to <16 x float>
2333  ret <16 x float> %1
2334}
2335
2336define <16 x float> @scto16f32(<16 x i8> %a) {
2337; GENERIC-LABEL: scto16f32:
2338; GENERIC:       # %bb.0:
2339; GENERIC-NEXT:    vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00]
2340; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
2341; GENERIC-NEXT:    retq # sched: [1:1.00]
2342;
2343; SKX-LABEL: scto16f32:
2344; SKX:       # %bb.0:
2345; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00]
2346; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
2347; SKX-NEXT:    retq # sched: [7:1.00]
2348  %1 = sitofp <16 x i8> %a to <16 x float>
2349  ret <16 x float> %1
2350}
2351
2352define <16 x float> @ssto16f32(<16 x i16> %a) {
2353; GENERIC-LABEL: ssto16f32:
2354; GENERIC:       # %bb.0:
2355; GENERIC-NEXT:    vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00]
2356; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
2357; GENERIC-NEXT:    retq # sched: [1:1.00]
2358;
2359; SKX-LABEL: ssto16f32:
2360; SKX:       # %bb.0:
2361; SKX-NEXT:    vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
2362; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
2363; SKX-NEXT:    retq # sched: [7:1.00]
2364  %1 = sitofp <16 x i16> %a to <16 x float>
2365  ret <16 x float> %1
2366}
2367
2368define <8 x double> @ssto16f64(<8 x i16> %a) {
2369; GENERIC-LABEL: ssto16f64:
2370; GENERIC:       # %bb.0:
2371; GENERIC-NEXT:    vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
2372; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2373; GENERIC-NEXT:    retq # sched: [1:1.00]
2374;
2375; SKX-LABEL: ssto16f64:
2376; SKX:       # %bb.0:
2377; SKX-NEXT:    vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
2378; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2379; SKX-NEXT:    retq # sched: [7:1.00]
2380  %1 = sitofp <8 x i16> %a to <8 x double>
2381  ret <8 x double> %1
2382}
2383
2384define <8 x double> @scto8f64(<8 x i8> %a) {
2385; GENERIC-LABEL: scto8f64:
2386; GENERIC:       # %bb.0:
2387; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
2388; GENERIC-NEXT:    vpslld $24, %ymm0, %ymm0 # sched: [1:1.00]
2389; GENERIC-NEXT:    vpsrad $24, %ymm0, %ymm0 # sched: [1:1.00]
2390; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2391; GENERIC-NEXT:    retq # sched: [1:1.00]
2392;
2393; SKX-LABEL: scto8f64:
2394; SKX:       # %bb.0:
2395; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
2396; SKX-NEXT:    vpslld $24, %ymm0, %ymm0 # sched: [1:0.50]
2397; SKX-NEXT:    vpsrad $24, %ymm0, %ymm0 # sched: [1:0.50]
2398; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2399; SKX-NEXT:    retq # sched: [7:1.00]
2400  %1 = sitofp <8 x i8> %a to <8 x double>
2401  ret <8 x double> %1
2402}
2403
2404define <16 x double> @scto16f64(<16 x i8> %a) {
2405; GENERIC-LABEL: scto16f64:
2406; GENERIC:       # %bb.0:
2407; GENERIC-NEXT:    vpmovsxbd %xmm0, %zmm1 # sched: [1:1.00]
2408; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
2409; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
2410; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2411; GENERIC-NEXT:    retq # sched: [1:1.00]
2412;
2413; SKX-LABEL: scto16f64:
2414; SKX:       # %bb.0:
2415; SKX-NEXT:    vpmovsxbd %xmm0, %zmm1 # sched: [3:1.00]
2416; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
2417; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
2418; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2419; SKX-NEXT:    retq # sched: [7:1.00]
2420  %b = sitofp <16 x i8> %a to <16 x double>
2421  ret <16 x double> %b
2422}
2423
2424define <16 x double> @sbto16f64(<16 x double> %a) {
2425; GENERIC-LABEL: sbto16f64:
2426; GENERIC:       # %bb.0:
2427; GENERIC-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
2428; GENERIC-NEXT:    vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00]
2429; GENERIC-NEXT:    vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00]
2430; GENERIC-NEXT:    kunpckbw %k0, %k1, %k0 # sched: [1:1.00]
2431; GENERIC-NEXT:    vpmovm2d %k0, %zmm1 # sched: [1:0.33]
2432; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
2433; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
2434; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2435; GENERIC-NEXT:    retq # sched: [1:1.00]
2436;
2437; SKX-LABEL: sbto16f64:
2438; SKX:       # %bb.0:
2439; SKX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
2440; SKX-NEXT:    vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00]
2441; SKX-NEXT:    vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00]
2442; SKX-NEXT:    kunpckbw %k0, %k1, %k0 # sched: [3:1.00]
2443; SKX-NEXT:    vpmovm2d %k0, %zmm1 # sched: [1:0.25]
2444; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
2445; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
2446; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2447; SKX-NEXT:    retq # sched: [7:1.00]
2448  %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
2449  %1 = sitofp <16 x i1> %cmpres to <16 x double>
2450  ret <16 x double> %1
2451}
2452
2453define <8 x double> @sbto8f64(<8 x double> %a) {
2454; GENERIC-LABEL: sbto8f64:
2455; GENERIC:       # %bb.0:
2456; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2457; GENERIC-NEXT:    vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
2458; GENERIC-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.33]
2459; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2460; GENERIC-NEXT:    retq # sched: [1:1.00]
2461;
2462; SKX-LABEL: sbto8f64:
2463; SKX:       # %bb.0:
2464; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2465; SKX-NEXT:    vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
2466; SKX-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.25]
2467; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2468; SKX-NEXT:    retq # sched: [7:1.00]
2469  %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
2470  %1 = sitofp <8 x i1> %cmpres to <8 x double>
2471  ret <8 x double> %1
2472}
2473
2474define <8 x float> @sbto8f32(<8 x float> %a) {
2475; GENERIC-LABEL: sbto8f32:
2476; GENERIC:       # %bb.0:
2477; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2478; GENERIC-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2479; GENERIC-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
2480; GENERIC-NEXT:    retq # sched: [1:1.00]
2481;
2482; SKX-LABEL: sbto8f32:
2483; SKX:       # %bb.0:
2484; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2485; SKX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
2486; SKX-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
2487; SKX-NEXT:    retq # sched: [7:1.00]
2488  %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
2489  %1 = sitofp <8 x i1> %cmpres to <8 x float>
2490  ret <8 x float> %1
2491}
2492
2493define <4 x float> @sbto4f32(<4 x float> %a) {
2494; GENERIC-LABEL: sbto4f32:
2495; GENERIC:       # %bb.0:
2496; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2497; GENERIC-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
2498; GENERIC-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
2499; GENERIC-NEXT:    retq # sched: [1:1.00]
2500;
2501; SKX-LABEL: sbto4f32:
2502; SKX:       # %bb.0:
2503; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2504; SKX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
2505; SKX-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
2506; SKX-NEXT:    retq # sched: [7:1.00]
2507  %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
2508  %1 = sitofp <4 x i1> %cmpres to <4 x float>
2509  ret <4 x float> %1
2510}
2511
2512define <4 x double> @sbto4f64(<4 x double> %a) {
2513; GENERIC-LABEL: sbto4f64:
2514; GENERIC:       # %bb.0:
2515; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2516; GENERIC-NEXT:    vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
2517; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
2518; GENERIC-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
2519; GENERIC-NEXT:    retq # sched: [1:1.00]
2520;
2521; SKX-LABEL: sbto4f64:
2522; SKX:       # %bb.0:
2523; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2524; SKX-NEXT:    vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
2525; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
2526; SKX-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
2527; SKX-NEXT:    retq # sched: [7:1.00]
2528  %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
2529  %1 = sitofp <4 x i1> %cmpres to <4 x double>
2530  ret <4 x double> %1
2531}
2532
2533define <2 x float> @sbto2f32(<2 x float> %a) {
2534; GENERIC-LABEL: sbto2f32:
2535; GENERIC:       # %bb.0:
2536; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2537; GENERIC-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
2538; GENERIC-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
2539; GENERIC-NEXT:    retq # sched: [1:1.00]
2540;
2541; SKX-LABEL: sbto2f32:
2542; SKX:       # %bb.0:
2543; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2544; SKX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
2545; SKX-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
2546; SKX-NEXT:    retq # sched: [7:1.00]
2547  %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
2548  %1 = sitofp <2 x i1> %cmpres to <2 x float>
2549  ret <2 x float> %1
2550}
2551
2552define <2 x double> @sbto2f64(<2 x double> %a) {
2553; GENERIC-LABEL: sbto2f64:
2554; GENERIC:       # %bb.0:
2555; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2556; GENERIC-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
2557; GENERIC-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
2558; GENERIC-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
2559; GENERIC-NEXT:    retq # sched: [1:1.00]
2560;
2561; SKX-LABEL: sbto2f64:
2562; SKX:       # %bb.0:
2563; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2564; SKX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
2565; SKX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
2566; SKX-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
2567; SKX-NEXT:    retq # sched: [7:1.00]
2568  %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
2569  %1 = sitofp <2 x i1> %cmpres to <2 x double>
2570  ret <2 x double> %1
2571}
2572
2573define <16 x float> @ucto16f32(<16 x i8> %a) {
2574; GENERIC-LABEL: ucto16f32:
2575; GENERIC:       # %bb.0:
2576; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
2577; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
2578; GENERIC-NEXT:    retq # sched: [1:1.00]
2579;
2580; SKX-LABEL: ucto16f32:
2581; SKX:       # %bb.0:
2582; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
2583; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
2584; SKX-NEXT:    retq # sched: [7:1.00]
2585  %b = uitofp <16 x i8> %a to <16 x float>
2586  ret <16 x float>%b
2587}
2588
2589define <8 x double> @ucto8f64(<8 x i8> %a) {
2590; GENERIC-LABEL: ucto8f64:
2591; GENERIC:       # %bb.0:
2592; GENERIC-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
2593; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
2594; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2595; GENERIC-NEXT:    retq # sched: [1:1.00]
2596;
2597; SKX-LABEL: ucto8f64:
2598; SKX:       # %bb.0:
2599; SKX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
2600; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
2601; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2602; SKX-NEXT:    retq # sched: [7:1.00]
2603  %b = uitofp <8 x i8> %a to <8 x double>
2604  ret <8 x double> %b
2605}
2606
2607define <16 x float> @swto16f32(<16 x i16> %a) {
2608; GENERIC-LABEL: swto16f32:
2609; GENERIC:       # %bb.0:
2610; GENERIC-NEXT:    vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00]
2611; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
2612; GENERIC-NEXT:    retq # sched: [1:1.00]
2613;
2614; SKX-LABEL: swto16f32:
2615; SKX:       # %bb.0:
2616; SKX-NEXT:    vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
2617; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
2618; SKX-NEXT:    retq # sched: [7:1.00]
2619  %b = sitofp <16 x i16> %a to <16 x float>
2620  ret <16 x float> %b
2621}
2622
2623define <8 x double> @swto8f64(<8 x i16> %a) {
2624; GENERIC-LABEL: swto8f64:
2625; GENERIC:       # %bb.0:
2626; GENERIC-NEXT:    vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
2627; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2628; GENERIC-NEXT:    retq # sched: [1:1.00]
2629;
2630; SKX-LABEL: swto8f64:
2631; SKX:       # %bb.0:
2632; SKX-NEXT:    vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
2633; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2634; SKX-NEXT:    retq # sched: [7:1.00]
2635  %b = sitofp <8 x i16> %a to <8 x double>
2636  ret <8 x double> %b
2637}
2638
2639define <16 x double> @swto16f64(<16 x i16> %a) {
2640; GENERIC-LABEL: swto16f64:
2641; GENERIC:       # %bb.0:
2642; GENERIC-NEXT:    vpmovsxwd %ymm0, %zmm1 # sched: [1:1.00]
2643; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
2644; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
2645; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2646; GENERIC-NEXT:    retq # sched: [1:1.00]
2647;
2648; SKX-LABEL: swto16f64:
2649; SKX:       # %bb.0:
2650; SKX-NEXT:    vpmovsxwd %ymm0, %zmm1 # sched: [3:1.00]
2651; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
2652; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
2653; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2654; SKX-NEXT:    retq # sched: [7:1.00]
2655  %b = sitofp <16 x i16> %a to <16 x double>
2656  ret <16 x double> %b
2657}
2658
2659define <16 x double> @ucto16f64(<16 x i8> %a) {
2660; GENERIC-LABEL: ucto16f64:
2661; GENERIC:       # %bb.0:
2662; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
2663; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
2664; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
2665; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2666; GENERIC-NEXT:    retq # sched: [1:1.00]
2667;
2668; SKX-LABEL: ucto16f64:
2669; SKX:       # %bb.0:
2670; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
2671; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
2672; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
2673; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2674; SKX-NEXT:    retq # sched: [7:1.00]
2675  %b = uitofp <16 x i8> %a to <16 x double>
2676  ret <16 x double> %b
2677}
2678
2679define <16 x float> @uwto16f32(<16 x i16> %a) {
2680; GENERIC-LABEL: uwto16f32:
2681; GENERIC:       # %bb.0:
2682; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
2683; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
2684; GENERIC-NEXT:    retq # sched: [1:1.00]
2685;
2686; SKX-LABEL: uwto16f32:
2687; SKX:       # %bb.0:
2688; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
2689; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
2690; SKX-NEXT:    retq # sched: [7:1.00]
2691  %b = uitofp <16 x i16> %a to <16 x float>
2692  ret <16 x float> %b
2693}
2694
2695define <8 x double> @uwto8f64(<8 x i16> %a) {
2696; GENERIC-LABEL: uwto8f64:
2697; GENERIC:       # %bb.0:
2698; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
2699; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2700; GENERIC-NEXT:    retq # sched: [1:1.00]
2701;
2702; SKX-LABEL: uwto8f64:
2703; SKX:       # %bb.0:
2704; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
2705; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2706; SKX-NEXT:    retq # sched: [7:1.00]
2707  %b = uitofp <8 x i16> %a to <8 x double>
2708  ret <8 x double> %b
2709}
2710
2711define <16 x double> @uwto16f64(<16 x i16> %a) {
2712; GENERIC-LABEL: uwto16f64:
2713; GENERIC:       # %bb.0:
2714; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
2715; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
2716; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
2717; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2718; GENERIC-NEXT:    retq # sched: [1:1.00]
2719;
2720; SKX-LABEL: uwto16f64:
2721; SKX:       # %bb.0:
2722; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
2723; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
2724; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
2725; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2726; SKX-NEXT:    retq # sched: [7:1.00]
2727  %b = uitofp <16 x i16> %a to <16 x double>
2728  ret <16 x double> %b
2729}
2730
2731define <16 x float> @sito16f32(<16 x i32> %a) {
2732; GENERIC-LABEL: sito16f32:
2733; GENERIC:       # %bb.0:
2734; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
2735; GENERIC-NEXT:    retq # sched: [1:1.00]
2736;
2737; SKX-LABEL: sito16f32:
2738; SKX:       # %bb.0:
2739; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
2740; SKX-NEXT:    retq # sched: [7:1.00]
2741  %b = sitofp <16 x i32> %a to <16 x float>
2742  ret <16 x float> %b
2743}
2744
2745define <16 x double> @sito16f64(<16 x i32> %a) {
2746; GENERIC-LABEL: sito16f64:
2747; GENERIC:       # %bb.0:
2748; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm2 # sched: [4:1.00]
2749; GENERIC-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00]
2750; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm1 # sched: [4:1.00]
2751; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
2752; GENERIC-NEXT:    retq # sched: [1:1.00]
2753;
2754; SKX-LABEL: sito16f64:
2755; SKX:       # %bb.0:
2756; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm2 # sched: [7:1.00]
2757; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00]
2758; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm1 # sched: [7:1.00]
2759; SKX-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:0.33]
2760; SKX-NEXT:    retq # sched: [7:1.00]
2761  %b = sitofp <16 x i32> %a to <16 x double>
2762  ret <16 x double> %b
2763}
2764
2765define <16 x float> @usto16f32(<16 x i16> %a) {
2766; GENERIC-LABEL: usto16f32:
2767; GENERIC:       # %bb.0:
2768; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
2769; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
2770; GENERIC-NEXT:    retq # sched: [1:1.00]
2771;
2772; SKX-LABEL: usto16f32:
2773; SKX:       # %bb.0:
2774; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
2775; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
2776; SKX-NEXT:    retq # sched: [7:1.00]
2777  %b = uitofp <16 x i16> %a to <16 x float>
2778  ret <16 x float> %b
2779}
2780
2781define <16 x float> @ubto16f32(<16 x i32> %a) {
2782; GENERIC-LABEL: ubto16f32:
2783; GENERIC:       # %bb.0:
2784; GENERIC-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:0.33]
2785; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
2786; GENERIC-NEXT:    vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
2787; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
2788; GENERIC-NEXT:    retq # sched: [1:1.00]
2789;
2790; SKX-LABEL: ubto16f32:
2791; SKX:       # %bb.0:
2792; SKX-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:1.00]
2793; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
2794; SKX-NEXT:    vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
2795; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
2796; SKX-NEXT:    retq # sched: [7:1.00]
2797  %mask = icmp slt <16 x i32> %a, zeroinitializer
2798  %1 = uitofp <16 x i1> %mask to <16 x float>
2799  ret <16 x float> %1
2800}
2801
2802define <16 x double> @ubto16f64(<16 x i32> %a) {
2803; GENERIC-LABEL: ubto16f64:
2804; GENERIC:       # %bb.0:
2805; GENERIC-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:0.33]
2806; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
2807; GENERIC-NEXT:    vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00]
2808; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
2809; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
2810; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2811; GENERIC-NEXT:    retq # sched: [1:1.00]
2812;
2813; SKX-LABEL: ubto16f64:
2814; SKX:       # %bb.0:
2815; SKX-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:1.00]
2816; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
2817; SKX-NEXT:    vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00]
2818; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
2819; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
2820; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2821; SKX-NEXT:    retq # sched: [7:1.00]
2822  %mask = icmp slt <16 x i32> %a, zeroinitializer
2823  %1 = uitofp <16 x i1> %mask to <16 x double>
2824  ret <16 x double> %1
2825}
2826
2827define <8 x float> @ubto8f32(<8 x i32> %a) {
2828; GENERIC-LABEL: ubto8f32:
2829; GENERIC:       # %bb.0:
2830; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2831; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2832; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
2833; GENERIC-NEXT:    retq # sched: [1:1.00]
2834;
2835; SKX-LABEL: ubto8f32:
2836; SKX:       # %bb.0:
2837; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2838; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2839; SKX-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
2840; SKX-NEXT:    retq # sched: [7:1.00]
2841  %mask = icmp slt <8 x i32> %a, zeroinitializer
2842  %1 = uitofp <8 x i1> %mask to <8 x float>
2843  ret <8 x float> %1
2844}
2845
2846define <8 x double> @ubto8f64(<8 x i32> %a) {
2847; GENERIC-LABEL: ubto8f64:
2848; GENERIC:       # %bb.0:
2849; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2850; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2851; GENERIC-NEXT:    vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
2852; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2853; GENERIC-NEXT:    retq # sched: [1:1.00]
2854;
2855; SKX-LABEL: ubto8f64:
2856; SKX:       # %bb.0:
2857; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2858; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2859; SKX-NEXT:    vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
2860; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2861; SKX-NEXT:    retq # sched: [7:1.00]
2862  %mask = icmp slt <8 x i32> %a, zeroinitializer
2863  %1 = uitofp <8 x i1> %mask to <8 x double>
2864  ret <8 x double> %1
2865}
2866
2867define <4 x float> @ubto4f32(<4 x i32> %a) {
2868; GENERIC-LABEL: ubto4f32:
2869; GENERIC:       # %bb.0:
2870; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2871; GENERIC-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
2872; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
2873; GENERIC-NEXT:    retq # sched: [1:1.00]
2874;
2875; SKX-LABEL: ubto4f32:
2876; SKX:       # %bb.0:
2877; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2878; SKX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
2879; SKX-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
2880; SKX-NEXT:    retq # sched: [7:1.00]
2881  %mask = icmp slt <4 x i32> %a, zeroinitializer
2882  %1 = uitofp <4 x i1> %mask to <4 x float>
2883  ret <4 x float> %1
2884}
2885
2886define <4 x double> @ubto4f64(<4 x i32> %a) {
2887; GENERIC-LABEL: ubto4f64:
2888; GENERIC:       # %bb.0:
2889; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2890; GENERIC-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
2891; GENERIC-NEXT:    vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
2892; GENERIC-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
2893; GENERIC-NEXT:    retq # sched: [1:1.00]
2894;
2895; SKX-LABEL: ubto4f64:
2896; SKX:       # %bb.0:
2897; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2898; SKX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
2899; SKX-NEXT:    vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
2900; SKX-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
2901; SKX-NEXT:    retq # sched: [7:1.00]
2902  %mask = icmp slt <4 x i32> %a, zeroinitializer
2903  %1 = uitofp <4 x i1> %mask to <4 x double>
2904  ret <4 x double> %1
2905}
2906
2907define <2 x float> @ubto2f32(<2 x i32> %a) {
2908; GENERIC-LABEL: ubto2f32:
2909; GENERIC:       # %bb.0:
2910; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2911; GENERIC-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
2912; GENERIC-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2913; GENERIC-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
2914; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50]
2915; GENERIC-NEXT:    retq # sched: [1:1.00]
2916;
2917; SKX-LABEL: ubto2f32:
2918; SKX:       # %bb.0:
2919; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2920; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
2921; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2922; SKX-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
2923; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
2924; SKX-NEXT:    retq # sched: [7:1.00]
2925  %mask = icmp ne <2 x i32> %a, zeroinitializer
2926  %1 = uitofp <2 x i1> %mask to <2 x float>
2927  ret <2 x float> %1
2928}
2929
2930define <2 x double> @ubto2f64(<2 x i32> %a) {
2931; GENERIC-LABEL: ubto2f64:
2932; GENERIC:       # %bb.0:
2933; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2934; GENERIC-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
2935; GENERIC-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2936; GENERIC-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
2937; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50]
2938; GENERIC-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
2939; GENERIC-NEXT:    retq # sched: [1:1.00]
2940;
2941; SKX-LABEL: ubto2f64:
2942; SKX:       # %bb.0:
2943; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2944; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
2945; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2946; SKX-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
2947; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
2948; SKX-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
2949; SKX-NEXT:    retq # sched: [7:1.00]
2950  %mask = icmp ne <2 x i32> %a, zeroinitializer
2951  %1 = uitofp <2 x i1> %mask to <2 x double>
2952  ret <2 x double> %1
2953}
2954
2955define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
2956; GENERIC-LABEL: zext_8x8mem_to_8x16:
2957; GENERIC:       # %bb.0:
2958; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
2959; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
2960; GENERIC-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
2961; GENERIC-NEXT:    retq # sched: [1:1.00]
2962;
2963; SKX-LABEL: zext_8x8mem_to_8x16:
2964; SKX:       # %bb.0:
2965; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
2966; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
2967; SKX-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
2968; SKX-NEXT:    retq # sched: [7:1.00]
2969  %a   = load <8 x i8>,<8 x i8> *%i,align 1
2970  %x   = zext <8 x i8> %a to <8 x i16>
2971  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
2972  ret <8 x i16> %ret
2973}
2974
2975define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
2976; GENERIC-LABEL: sext_8x8mem_to_8x16:
2977; GENERIC:       # %bb.0:
2978; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
2979; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
2980; GENERIC-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
2981; GENERIC-NEXT:    retq # sched: [1:1.00]
2982;
2983; SKX-LABEL: sext_8x8mem_to_8x16:
2984; SKX:       # %bb.0:
2985; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
2986; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
2987; SKX-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
2988; SKX-NEXT:    retq # sched: [7:1.00]
2989  %a   = load <8 x i8>,<8 x i8> *%i,align 1
2990  %x   = sext <8 x i8> %a to <8 x i16>
2991  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
2992  ret <8 x i16> %ret
2993}
2994
2995
2996define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
2997; GENERIC-LABEL: zext_16x8mem_to_16x16:
2998; GENERIC:       # %bb.0:
2999; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3000; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3001; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
3002; GENERIC-NEXT:    retq # sched: [1:1.00]
3003;
3004; SKX-LABEL: zext_16x8mem_to_16x16:
3005; SKX:       # %bb.0:
3006; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3007; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3008; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
3009; SKX-NEXT:    retq # sched: [7:1.00]
3010  %a   = load <16 x i8>,<16 x i8> *%i,align 1
3011  %x   = zext <16 x i8> %a to <16 x i16>
3012  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
3013  ret <16 x i16> %ret
3014}
3015
3016define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
3017; GENERIC-LABEL: sext_16x8mem_to_16x16:
3018; GENERIC:       # %bb.0:
3019; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3020; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3021; GENERIC-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
3022; GENERIC-NEXT:    retq # sched: [1:1.00]
3023;
3024; SKX-LABEL: sext_16x8mem_to_16x16:
3025; SKX:       # %bb.0:
3026; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3027; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3028; SKX-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
3029; SKX-NEXT:    retq # sched: [7:1.00]
3030  %a   = load <16 x i8>,<16 x i8> *%i,align 1
3031  %x   = sext <16 x i8> %a to <16 x i16>
3032  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
3033  ret <16 x i16> %ret
3034}
3035
3036define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
3037; GENERIC-LABEL: zext_16x8_to_16x16:
3038; GENERIC:       # %bb.0:
3039; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
3040; GENERIC-NEXT:    retq # sched: [1:1.00]
3041;
3042; SKX-LABEL: zext_16x8_to_16x16:
3043; SKX:       # %bb.0:
3044; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
3045; SKX-NEXT:    retq # sched: [7:1.00]
3046  %x   = zext <16 x i8> %a to <16 x i16>
3047  ret <16 x i16> %x
3048}
3049
3050define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
3051; GENERIC-LABEL: zext_16x8_to_16x16_mask:
3052; GENERIC:       # %bb.0:
3053; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
3054; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
3055; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
3056; GENERIC-NEXT:    retq # sched: [1:1.00]
3057;
3058; SKX-LABEL: zext_16x8_to_16x16_mask:
3059; SKX:       # %bb.0:
3060; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
3061; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
3062; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
3063; SKX-NEXT:    retq # sched: [7:1.00]
3064  %x   = zext <16 x i8> %a to <16 x i16>
3065  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
3066  ret <16 x i16> %ret
3067}
3068
3069define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
3070; GENERIC-LABEL: sext_16x8_to_16x16:
3071; GENERIC:       # %bb.0:
3072; GENERIC-NEXT:    vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
3073; GENERIC-NEXT:    retq # sched: [1:1.00]
3074;
3075; SKX-LABEL: sext_16x8_to_16x16:
3076; SKX:       # %bb.0:
3077; SKX-NEXT:    vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
3078; SKX-NEXT:    retq # sched: [7:1.00]
3079  %x   = sext <16 x i8> %a to <16 x i16>
3080  ret <16 x i16> %x
3081}
3082
3083define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
3084; GENERIC-LABEL: sext_16x8_to_16x16_mask:
3085; GENERIC:       # %bb.0:
3086; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
3087; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
3088; GENERIC-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [1:1.00]
3089; GENERIC-NEXT:    retq # sched: [1:1.00]
3090;
3091; SKX-LABEL: sext_16x8_to_16x16_mask:
3092; SKX:       # %bb.0:
3093; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
3094; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
3095; SKX-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00]
3096; SKX-NEXT:    retq # sched: [7:1.00]
3097  %x   = sext <16 x i8> %a to <16 x i16>
3098  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
3099  ret <16 x i16> %ret
3100}
3101
3102define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
3103; GENERIC-LABEL: zext_32x8mem_to_32x16:
3104; GENERIC:       # %bb.0:
3105; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
3106; GENERIC-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:0.33]
3107; GENERIC-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [8:1.00]
3108; GENERIC-NEXT:    retq # sched: [1:1.00]
3109;
3110; SKX-LABEL: zext_32x8mem_to_32x16:
3111; SKX:       # %bb.0:
3112; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
3113; SKX-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:1.00]
3114; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [10:1.00]
3115; SKX-NEXT:    retq # sched: [7:1.00]
3116  %a   = load <32 x i8>,<32 x i8> *%i,align 1
3117  %x   = zext <32 x i8> %a to <32 x i16>
3118  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
3119  ret <32 x i16> %ret
3120}
3121
3122define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
3123; GENERIC-LABEL: sext_32x8mem_to_32x16:
3124; GENERIC:       # %bb.0:
3125; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
3126; GENERIC-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:0.33]
3127; GENERIC-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
3128; GENERIC-NEXT:    retq # sched: [1:1.00]
3129;
3130; SKX-LABEL: sext_32x8mem_to_32x16:
3131; SKX:       # %bb.0:
3132; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
3133; SKX-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:1.00]
3134; SKX-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
3135; SKX-NEXT:    retq # sched: [7:1.00]
3136  %a   = load <32 x i8>,<32 x i8> *%i,align 1
3137  %x   = sext <32 x i8> %a to <32 x i16>
3138  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
3139  ret <32 x i16> %ret
3140}
3141
3142define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
3143; GENERIC-LABEL: zext_32x8_to_32x16:
3144; GENERIC:       # %bb.0:
3145; GENERIC-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00]
3146; GENERIC-NEXT:    retq # sched: [1:1.00]
3147;
3148; SKX-LABEL: zext_32x8_to_32x16:
3149; SKX:       # %bb.0:
3150; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00]
3151; SKX-NEXT:    retq # sched: [7:1.00]
3152  %x   = zext <32 x i8> %a to <32 x i16>
3153  ret <32 x i16> %x
3154}
3155
3156define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
3157; GENERIC-LABEL: zext_32x8_to_32x16_mask:
3158; GENERIC:       # %bb.0:
3159; GENERIC-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
3160; GENERIC-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:0.33]
3161; GENERIC-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00]
3162; GENERIC-NEXT:    retq # sched: [1:1.00]
3163;
3164; SKX-LABEL: zext_32x8_to_32x16_mask:
3165; SKX:       # %bb.0:
3166; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
3167; SKX-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:1.00]
3168; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00]
3169; SKX-NEXT:    retq # sched: [7:1.00]
3170  %x   = zext <32 x i8> %a to <32 x i16>
3171  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
3172  ret <32 x i16> %ret
3173}
3174
3175define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
3176; GENERIC-LABEL: sext_32x8_to_32x16:
3177; GENERIC:       # %bb.0:
3178; GENERIC-NEXT:    vpmovsxbw %ymm0, %zmm0 # sched: [1:1.00]
3179; GENERIC-NEXT:    retq # sched: [1:1.00]
3180;
3181; SKX-LABEL: sext_32x8_to_32x16:
3182; SKX:       # %bb.0:
3183; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0 # sched: [3:1.00]
3184; SKX-NEXT:    retq # sched: [7:1.00]
3185  %x   = sext <32 x i8> %a to <32 x i16>
3186  ret <32 x i16> %x
3187}
3188
3189define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
3190; GENERIC-LABEL: sext_32x8_to_32x16_mask:
3191; GENERIC:       # %bb.0:
3192; GENERIC-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
3193; GENERIC-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:0.33]
3194; GENERIC-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [1:1.00]
3195; GENERIC-NEXT:    retq # sched: [1:1.00]
3196;
3197; SKX-LABEL: sext_32x8_to_32x16_mask:
3198; SKX:       # %bb.0:
3199; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
3200; SKX-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:1.00]
3201; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [3:1.00]
3202; SKX-NEXT:    retq # sched: [7:1.00]
3203  %x   = sext <32 x i8> %a to <32 x i16>
3204  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
3205  ret <32 x i16> %ret
3206}
3207
3208define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
3209; GENERIC-LABEL: zext_4x8mem_to_4x32:
3210; GENERIC:       # %bb.0:
3211; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3212; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
3213; GENERIC-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
3214; GENERIC-NEXT:    retq # sched: [1:1.00]
3215;
3216; SKX-LABEL: zext_4x8mem_to_4x32:
3217; SKX:       # %bb.0:
3218; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3219; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
3220; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
3221; SKX-NEXT:    retq # sched: [7:1.00]
3222  %a   = load <4 x i8>,<4 x i8> *%i,align 1
3223  %x   = zext <4 x i8> %a to <4 x i32>
3224  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
3225  ret <4 x i32> %ret
3226}
3227
3228define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
3229; GENERIC-LABEL: sext_4x8mem_to_4x32:
3230; GENERIC:       # %bb.0:
3231; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3232; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
3233; GENERIC-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
3234; GENERIC-NEXT:    retq # sched: [1:1.00]
3235;
3236; SKX-LABEL: sext_4x8mem_to_4x32:
3237; SKX:       # %bb.0:
3238; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3239; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
3240; SKX-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
3241; SKX-NEXT:    retq # sched: [7:1.00]
3242  %a   = load <4 x i8>,<4 x i8> *%i,align 1
3243  %x   = sext <4 x i8> %a to <4 x i32>
3244  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
3245  ret <4 x i32> %ret
3246}
3247
3248define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
3249; GENERIC-LABEL: zext_8x8mem_to_8x32:
3250; GENERIC:       # %bb.0:
3251; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3252; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3253; GENERIC-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
3254; GENERIC-NEXT:    retq # sched: [1:1.00]
3255;
3256; SKX-LABEL: zext_8x8mem_to_8x32:
3257; SKX:       # %bb.0:
3258; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3259; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3260; SKX-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
3261; SKX-NEXT:    retq # sched: [7:1.00]
3262  %a   = load <8 x i8>,<8 x i8> *%i,align 1
3263  %x   = zext <8 x i8> %a to <8 x i32>
3264  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
3265  ret <8 x i32> %ret
3266}
3267
3268define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
3269; GENERIC-LABEL: sext_8x8mem_to_8x32:
3270; GENERIC:       # %bb.0:
3271; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3272; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3273; GENERIC-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
3274; GENERIC-NEXT:    retq # sched: [1:1.00]
3275;
3276; SKX-LABEL: sext_8x8mem_to_8x32:
3277; SKX:       # %bb.0:
3278; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3279; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3280; SKX-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
3281; SKX-NEXT:    retq # sched: [7:1.00]
3282  %a   = load <8 x i8>,<8 x i8> *%i,align 1
3283  %x   = sext <8 x i8> %a to <8 x i32>
3284  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
3285  ret <8 x i32> %ret
3286}
3287
3288define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
3289; GENERIC-LABEL: zext_16x8mem_to_16x32:
3290; GENERIC:       # %bb.0:
3291; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3292; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3293; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [8:1.00]
3294; GENERIC-NEXT:    retq # sched: [1:1.00]
3295;
3296; SKX-LABEL: zext_16x8mem_to_16x32:
3297; SKX:       # %bb.0:
3298; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3299; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3300; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [10:1.00]
3301; SKX-NEXT:    retq # sched: [7:1.00]
3302  %a   = load <16 x i8>,<16 x i8> *%i,align 1
3303  %x   = zext <16 x i8> %a to <16 x i32>
3304  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3305  ret <16 x i32> %ret
3306}
3307
3308define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
3309; GENERIC-LABEL: sext_16x8mem_to_16x32:
3310; GENERIC:       # %bb.0:
3311; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3312; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3313; GENERIC-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
3314; GENERIC-NEXT:    retq # sched: [1:1.00]
3315;
3316; SKX-LABEL: sext_16x8mem_to_16x32:
3317; SKX:       # %bb.0:
3318; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3319; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3320; SKX-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
3321; SKX-NEXT:    retq # sched: [7:1.00]
3322  %a   = load <16 x i8>,<16 x i8> *%i,align 1
3323  %x   = sext <16 x i8> %a to <16 x i32>
3324  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3325  ret <16 x i32> %ret
3326}
3327
3328define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
3329; GENERIC-LABEL: zext_16x8_to_16x32_mask:
3330; GENERIC:       # %bb.0:
3331; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
3332; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
3333; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
3334; GENERIC-NEXT:    retq # sched: [1:1.00]
3335;
3336; SKX-LABEL: zext_16x8_to_16x32_mask:
3337; SKX:       # %bb.0:
3338; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
3339; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
3340; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
3341; SKX-NEXT:    retq # sched: [7:1.00]
3342  %x   = zext <16 x i8> %a to <16 x i32>
3343  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3344  ret <16 x i32> %ret
3345}
3346
3347define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
3348; GENERIC-LABEL: sext_16x8_to_16x32_mask:
3349; GENERIC:       # %bb.0:
3350; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
3351; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
3352; GENERIC-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
3353; GENERIC-NEXT:    retq # sched: [1:1.00]
3354;
3355; SKX-LABEL: sext_16x8_to_16x32_mask:
3356; SKX:       # %bb.0:
3357; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
3358; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
3359; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
3360; SKX-NEXT:    retq # sched: [7:1.00]
3361  %x   = sext <16 x i8> %a to <16 x i32>
3362  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3363  ret <16 x i32> %ret
3364}
3365
3366define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
3367; GENERIC-LABEL: zext_16x8_to_16x32:
3368; GENERIC:       # %bb.0:
3369; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
3370; GENERIC-NEXT:    retq # sched: [1:1.00]
3371;
3372; SKX-LABEL: zext_16x8_to_16x32:
3373; SKX:       # %bb.0:
3374; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
3375; SKX-NEXT:    retq # sched: [7:1.00]
3376  %x = zext <16 x i8> %i to <16 x i32>
3377  ret <16 x i32> %x
3378}
3379
3380define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
3381; GENERIC-LABEL: sext_16x8_to_16x32:
3382; GENERIC:       # %bb.0:
3383; GENERIC-NEXT:    vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00]
3384; GENERIC-NEXT:    retq # sched: [1:1.00]
3385;
3386; SKX-LABEL: sext_16x8_to_16x32:
3387; SKX:       # %bb.0:
3388; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00]
3389; SKX-NEXT:    retq # sched: [7:1.00]
3390  %x = sext <16 x i8> %i to <16 x i32>
3391  ret <16 x i32> %x
3392}
3393
3394define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
3395; GENERIC-LABEL: zext_2x8mem_to_2x64:
3396; GENERIC:       # %bb.0:
3397; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
3398; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
3399; GENERIC-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
3400; GENERIC-NEXT:    retq # sched: [1:1.00]
3401;
3402; SKX-LABEL: zext_2x8mem_to_2x64:
3403; SKX:       # %bb.0:
3404; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
3405; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
3406; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
3407; SKX-NEXT:    retq # sched: [7:1.00]
3408  %a   = load <2 x i8>,<2 x i8> *%i,align 1
3409  %x   = zext <2 x i8> %a to <2 x i64>
3410  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
3411  ret <2 x i64> %ret
3412}
3413define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
3414; GENERIC-LABEL: sext_2x8mem_to_2x64mask:
3415; GENERIC:       # %bb.0:
3416; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
3417; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
3418; GENERIC-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
3419; GENERIC-NEXT:    retq # sched: [1:1.00]
3420;
3421; SKX-LABEL: sext_2x8mem_to_2x64mask:
3422; SKX:       # %bb.0:
3423; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
3424; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
3425; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
3426; SKX-NEXT:    retq # sched: [7:1.00]
3427  %a   = load <2 x i8>,<2 x i8> *%i,align 1
3428  %x   = sext <2 x i8> %a to <2 x i64>
3429  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
3430  ret <2 x i64> %ret
3431}
3432define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
3433; GENERIC-LABEL: sext_2x8mem_to_2x64:
3434; GENERIC:       # %bb.0:
3435; GENERIC-NEXT:    vpmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
3436; GENERIC-NEXT:    retq # sched: [1:1.00]
3437;
3438; SKX-LABEL: sext_2x8mem_to_2x64:
3439; SKX:       # %bb.0:
3440; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
3441; SKX-NEXT:    retq # sched: [7:1.00]
3442  %a   = load <2 x i8>,<2 x i8> *%i,align 1
3443  %x   = sext <2 x i8> %a to <2 x i64>
3444  ret <2 x i64> %x
3445}
3446
3447define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
3448; GENERIC-LABEL: zext_4x8mem_to_4x64:
3449; GENERIC:       # %bb.0:
3450; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3451; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
3452; GENERIC-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
3453; GENERIC-NEXT:    retq # sched: [1:1.00]
3454;
3455; SKX-LABEL: zext_4x8mem_to_4x64:
3456; SKX:       # %bb.0:
3457; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3458; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
3459; SKX-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
3460; SKX-NEXT:    retq # sched: [7:1.00]
3461  %a   = load <4 x i8>,<4 x i8> *%i,align 1
3462  %x   = zext <4 x i8> %a to <4 x i64>
3463  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
3464  ret <4 x i64> %ret
3465}
3466
3467define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
3468; GENERIC-LABEL: sext_4x8mem_to_4x64mask:
3469; GENERIC:       # %bb.0:
3470; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3471; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
3472; GENERIC-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
3473; GENERIC-NEXT:    retq # sched: [1:1.00]
3474;
3475; SKX-LABEL: sext_4x8mem_to_4x64mask:
3476; SKX:       # %bb.0:
3477; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3478; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
3479; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
3480; SKX-NEXT:    retq # sched: [7:1.00]
3481  %a   = load <4 x i8>,<4 x i8> *%i,align 1
3482  %x   = sext <4 x i8> %a to <4 x i64>
3483  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
3484  ret <4 x i64> %ret
3485}
3486
3487define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
3488; GENERIC-LABEL: sext_4x8mem_to_4x64:
3489; GENERIC:       # %bb.0:
3490; GENERIC-NEXT:    vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00]
3491; GENERIC-NEXT:    retq # sched: [1:1.00]
3492;
3493; SKX-LABEL: sext_4x8mem_to_4x64:
3494; SKX:       # %bb.0:
3495; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00]
3496; SKX-NEXT:    retq # sched: [7:1.00]
3497  %a   = load <4 x i8>,<4 x i8> *%i,align 1
3498  %x   = sext <4 x i8> %a to <4 x i64>
3499  ret <4 x i64> %x
3500}
3501
3502define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
3503; GENERIC-LABEL: zext_8x8mem_to_8x64:
3504; GENERIC:       # %bb.0:
3505; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3506; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3507; GENERIC-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
3508; GENERIC-NEXT:    retq # sched: [1:1.00]
3509;
3510; SKX-LABEL: zext_8x8mem_to_8x64:
3511; SKX:       # %bb.0:
3512; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3513; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3514; SKX-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
3515; SKX-NEXT:    retq # sched: [7:1.00]
3516  %a   = load <8 x i8>,<8 x i8> *%i,align 1
3517  %x   = zext <8 x i8> %a to <8 x i64>
3518  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
3519  ret <8 x i64> %ret
3520}
3521
3522define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
3523; GENERIC-LABEL: sext_8x8mem_to_8x64mask:
3524; GENERIC:       # %bb.0:
3525; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3526; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3527; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
3528; GENERIC-NEXT:    retq # sched: [1:1.00]
3529;
3530; SKX-LABEL: sext_8x8mem_to_8x64mask:
3531; SKX:       # %bb.0:
3532; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3533; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3534; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
3535; SKX-NEXT:    retq # sched: [7:1.00]
3536  %a   = load <8 x i8>,<8 x i8> *%i,align 1
3537  %x   = sext <8 x i8> %a to <8 x i64>
3538  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
3539  ret <8 x i64> %ret
3540}
3541
3542define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
3543; GENERIC-LABEL: sext_8x8mem_to_8x64:
3544; GENERIC:       # %bb.0:
3545; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
3546; GENERIC-NEXT:    retq # sched: [1:1.00]
3547;
3548; SKX-LABEL: sext_8x8mem_to_8x64:
3549; SKX:       # %bb.0:
3550; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00]
3551; SKX-NEXT:    retq # sched: [7:1.00]
3552  %a   = load <8 x i8>,<8 x i8> *%i,align 1
3553  %x   = sext <8 x i8> %a to <8 x i64>
3554  ret <8 x i64> %x
3555}
3556
3557define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
3558; GENERIC-LABEL: zext_4x16mem_to_4x32:
3559; GENERIC:       # %bb.0:
3560; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3561; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
3562; GENERIC-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
3563; GENERIC-NEXT:    retq # sched: [1:1.00]
3564;
3565; SKX-LABEL: zext_4x16mem_to_4x32:
3566; SKX:       # %bb.0:
3567; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3568; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
3569; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
3570; SKX-NEXT:    retq # sched: [7:1.00]
3571  %a   = load <4 x i16>,<4 x i16> *%i,align 1
3572  %x   = zext <4 x i16> %a to <4 x i32>
3573  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
3574  ret <4 x i32> %ret
3575}
3576
3577define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
3578; GENERIC-LABEL: sext_4x16mem_to_4x32mask:
3579; GENERIC:       # %bb.0:
3580; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3581; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
3582; GENERIC-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
3583; GENERIC-NEXT:    retq # sched: [1:1.00]
3584;
3585; SKX-LABEL: sext_4x16mem_to_4x32mask:
3586; SKX:       # %bb.0:
3587; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3588; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
3589; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
3590; SKX-NEXT:    retq # sched: [7:1.00]
3591  %a   = load <4 x i16>,<4 x i16> *%i,align 1
3592  %x   = sext <4 x i16> %a to <4 x i32>
3593  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
3594  ret <4 x i32> %ret
3595}
3596
3597define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
3598; GENERIC-LABEL: sext_4x16mem_to_4x32:
3599; GENERIC:       # %bb.0:
3600; GENERIC-NEXT:    vpmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
3601; GENERIC-NEXT:    retq # sched: [1:1.00]
3602;
3603; SKX-LABEL: sext_4x16mem_to_4x32:
3604; SKX:       # %bb.0:
3605; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
3606; SKX-NEXT:    retq # sched: [7:1.00]
3607  %a   = load <4 x i16>,<4 x i16> *%i,align 1
3608  %x   = sext <4 x i16> %a to <4 x i32>
3609  ret <4 x i32> %x
3610}
3611
3612
3613define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
3614; GENERIC-LABEL: zext_8x16mem_to_8x32:
3615; GENERIC:       # %bb.0:
3616; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3617; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3618; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
3619; GENERIC-NEXT:    retq # sched: [1:1.00]
3620;
3621; SKX-LABEL: zext_8x16mem_to_8x32:
3622; SKX:       # %bb.0:
3623; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3624; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3625; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00]
3626; SKX-NEXT:    retq # sched: [7:1.00]
3627  %a   = load <8 x i16>,<8 x i16> *%i,align 1
3628  %x   = zext <8 x i16> %a to <8 x i32>
3629  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
3630  ret <8 x i32> %ret
3631}
3632
3633define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
3634; GENERIC-LABEL: sext_8x16mem_to_8x32mask:
3635; GENERIC:       # %bb.0:
3636; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3637; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3638; GENERIC-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
3639; GENERIC-NEXT:    retq # sched: [1:1.00]
3640;
3641; SKX-LABEL: sext_8x16mem_to_8x32mask:
3642; SKX:       # %bb.0:
3643; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3644; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3645; SKX-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
3646; SKX-NEXT:    retq # sched: [7:1.00]
3647  %a   = load <8 x i16>,<8 x i16> *%i,align 1
3648  %x   = sext <8 x i16> %a to <8 x i32>
3649  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
3650  ret <8 x i32> %ret
3651}
3652
3653define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
3654; GENERIC-LABEL: sext_8x16mem_to_8x32:
3655; GENERIC:       # %bb.0:
3656; GENERIC-NEXT:    vpmovsxwd (%rdi), %ymm0 # sched: [8:1.00]
3657; GENERIC-NEXT:    retq # sched: [1:1.00]
3658;
3659; SKX-LABEL: sext_8x16mem_to_8x32:
3660; SKX:       # %bb.0:
3661; SKX-NEXT:    vpmovsxwd (%rdi), %ymm0 # sched: [9:1.00]
3662; SKX-NEXT:    retq # sched: [7:1.00]
3663  %a   = load <8 x i16>,<8 x i16> *%i,align 1
3664  %x   = sext <8 x i16> %a to <8 x i32>
3665  ret <8 x i32> %x
3666}
3667
3668define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
3669; GENERIC-LABEL: zext_8x16_to_8x32mask:
3670; GENERIC:       # %bb.0:
3671; GENERIC-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
3672; GENERIC-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:0.33]
3673; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
3674; GENERIC-NEXT:    retq # sched: [1:1.00]
3675;
3676; SKX-LABEL: zext_8x16_to_8x32mask:
3677; SKX:       # %bb.0:
3678; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
3679; SKX-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:1.00]
3680; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
3681; SKX-NEXT:    retq # sched: [7:1.00]
3682  %x   = zext <8 x i16> %a to <8 x i32>
3683  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
3684  ret <8 x i32> %ret
3685}
3686
3687define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
3688; GENERIC-LABEL: zext_8x16_to_8x32:
3689; GENERIC:       # %bb.0:
3690; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
3691; GENERIC-NEXT:    retq # sched: [1:1.00]
3692;
3693; SKX-LABEL: zext_8x16_to_8x32:
3694; SKX:       # %bb.0:
3695; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
3696; SKX-NEXT:    retq # sched: [7:1.00]
3697  %x   = zext <8 x i16> %a to <8 x i32>
3698  ret <8 x i32> %x
3699}
3700
3701define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
3702; GENERIC-LABEL: zext_16x16mem_to_16x32:
3703; GENERIC:       # %bb.0:
3704; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3705; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3706; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
3707; GENERIC-NEXT:    retq # sched: [1:1.00]
3708;
3709; SKX-LABEL: zext_16x16mem_to_16x32:
3710; SKX:       # %bb.0:
3711; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3712; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3713; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
3714; SKX-NEXT:    retq # sched: [7:1.00]
3715  %a   = load <16 x i16>,<16 x i16> *%i,align 1
3716  %x   = zext <16 x i16> %a to <16 x i32>
3717  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3718  ret <16 x i32> %ret
3719}
3720
3721define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
3722; GENERIC-LABEL: sext_16x16mem_to_16x32mask:
3723; GENERIC:       # %bb.0:
3724; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3725; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3726; GENERIC-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
3727; GENERIC-NEXT:    retq # sched: [1:1.00]
3728;
3729; SKX-LABEL: sext_16x16mem_to_16x32mask:
3730; SKX:       # %bb.0:
3731; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3732; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3733; SKX-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
3734; SKX-NEXT:    retq # sched: [7:1.00]
3735  %a   = load <16 x i16>,<16 x i16> *%i,align 1
3736  %x   = sext <16 x i16> %a to <16 x i32>
3737  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3738  ret <16 x i32> %ret
3739}
3740
3741define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
3742; GENERIC-LABEL: sext_16x16mem_to_16x32:
3743; GENERIC:       # %bb.0:
3744; GENERIC-NEXT:    vpmovsxwd (%rdi), %zmm0 # sched: [8:1.00]
3745; GENERIC-NEXT:    retq # sched: [1:1.00]
3746;
3747; SKX-LABEL: sext_16x16mem_to_16x32:
3748; SKX:       # %bb.0:
3749; SKX-NEXT:    vpmovsxwd (%rdi), %zmm0 # sched: [10:1.00]
3750; SKX-NEXT:    retq # sched: [7:1.00]
3751  %a   = load <16 x i16>,<16 x i16> *%i,align 1
3752  %x   = sext <16 x i16> %a to <16 x i32>
3753  ret <16 x i32> %x
3754}
3755define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
3756; GENERIC-LABEL: zext_16x16_to_16x32mask:
3757; GENERIC:       # %bb.0:
3758; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
3759; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
3760; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
3761; GENERIC-NEXT:    retq # sched: [1:1.00]
3762;
3763; SKX-LABEL: zext_16x16_to_16x32mask:
3764; SKX:       # %bb.0:
3765; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
3766; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
3767; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
3768; SKX-NEXT:    retq # sched: [7:1.00]
3769  %x   = zext <16 x i16> %a to <16 x i32>
3770  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3771  ret <16 x i32> %ret
3772}
3773
3774define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
3775; GENERIC-LABEL: zext_16x16_to_16x32:
3776; GENERIC:       # %bb.0:
3777; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
3778; GENERIC-NEXT:    retq # sched: [1:1.00]
3779;
3780; SKX-LABEL: zext_16x16_to_16x32:
3781; SKX:       # %bb.0:
3782; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
3783; SKX-NEXT:    retq # sched: [7:1.00]
3784  %x   = zext <16 x i16> %a to <16 x i32>
3785  ret <16 x i32> %x
3786}
3787
3788define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
3789; GENERIC-LABEL: zext_2x16mem_to_2x64:
3790; GENERIC:       # %bb.0:
3791; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
3792; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
3793; GENERIC-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:1.00]
3794; GENERIC-NEXT:    retq # sched: [1:1.00]
3795;
3796; SKX-LABEL: zext_2x16mem_to_2x64:
3797; SKX:       # %bb.0:
3798; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
3799; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
3800; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [9:1.00]
3801; SKX-NEXT:    retq # sched: [7:1.00]
3802  %a   = load <2 x i16>,<2 x i16> *%i,align 1
3803  %x   = zext <2 x i16> %a to <2 x i64>
3804  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
3805  ret <2 x i64> %ret
3806}
3807
3808define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
3809; GENERIC-LABEL: sext_2x16mem_to_2x64mask:
3810; GENERIC:       # %bb.0:
3811; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
3812; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
3813; GENERIC-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
3814; GENERIC-NEXT:    retq # sched: [1:1.00]
3815;
3816; SKX-LABEL: sext_2x16mem_to_2x64mask:
3817; SKX:       # %bb.0:
3818; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
3819; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
3820; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
3821; SKX-NEXT:    retq # sched: [7:1.00]
3822  %a   = load <2 x i16>,<2 x i16> *%i,align 1
3823  %x   = sext <2 x i16> %a to <2 x i64>
3824  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
3825  ret <2 x i64> %ret
3826}
3827
3828define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
3829; GENERIC-LABEL: sext_2x16mem_to_2x64:
3830; GENERIC:       # %bb.0:
3831; GENERIC-NEXT:    vpmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
3832; GENERIC-NEXT:    retq # sched: [1:1.00]
3833;
3834; SKX-LABEL: sext_2x16mem_to_2x64:
3835; SKX:       # %bb.0:
3836; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
3837; SKX-NEXT:    retq # sched: [7:1.00]
3838  %a   = load <2 x i16>,<2 x i16> *%i,align 1
3839  %x   = sext <2 x i16> %a to <2 x i64>
3840  ret <2 x i64> %x
3841}
3842
3843define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
3844; GENERIC-LABEL: zext_4x16mem_to_4x64:
3845; GENERIC:       # %bb.0:
3846; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3847; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
3848; GENERIC-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
3849; GENERIC-NEXT:    retq # sched: [1:1.00]
3850;
3851; SKX-LABEL: zext_4x16mem_to_4x64:
3852; SKX:       # %bb.0:
3853; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3854; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
3855; SKX-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
3856; SKX-NEXT:    retq # sched: [7:1.00]
3857  %a   = load <4 x i16>,<4 x i16> *%i,align 1
3858  %x   = zext <4 x i16> %a to <4 x i64>
3859  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
3860  ret <4 x i64> %ret
3861}
3862
3863define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
3864; GENERIC-LABEL: sext_4x16mem_to_4x64mask:
3865; GENERIC:       # %bb.0:
3866; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3867; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
3868; GENERIC-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
3869; GENERIC-NEXT:    retq # sched: [1:1.00]
3870;
3871; SKX-LABEL: sext_4x16mem_to_4x64mask:
3872; SKX:       # %bb.0:
3873; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3874; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
3875; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
3876; SKX-NEXT:    retq # sched: [7:1.00]
3877  %a   = load <4 x i16>,<4 x i16> *%i,align 1
3878  %x   = sext <4 x i16> %a to <4 x i64>
3879  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
3880  ret <4 x i64> %ret
3881}
3882
3883define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
3884; GENERIC-LABEL: sext_4x16mem_to_4x64:
3885; GENERIC:       # %bb.0:
3886; GENERIC-NEXT:    vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00]
3887; GENERIC-NEXT:    retq # sched: [1:1.00]
3888;
3889; SKX-LABEL: sext_4x16mem_to_4x64:
3890; SKX:       # %bb.0:
3891; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00]
3892; SKX-NEXT:    retq # sched: [7:1.00]
3893  %a   = load <4 x i16>,<4 x i16> *%i,align 1
3894  %x   = sext <4 x i16> %a to <4 x i64>
3895  ret <4 x i64> %x
3896}
3897
3898define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
3899; GENERIC-LABEL: zext_8x16mem_to_8x64:
3900; GENERIC:       # %bb.0:
3901; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3902; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3903; GENERIC-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
3904; GENERIC-NEXT:    retq # sched: [1:1.00]
3905;
3906; SKX-LABEL: zext_8x16mem_to_8x64:
3907; SKX:       # %bb.0:
3908; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3909; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3910; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
3911; SKX-NEXT:    retq # sched: [7:1.00]
3912  %a   = load <8 x i16>,<8 x i16> *%i,align 1
3913  %x   = zext <8 x i16> %a to <8 x i64>
3914  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
3915  ret <8 x i64> %ret
3916}
3917
3918define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
3919; GENERIC-LABEL: sext_8x16mem_to_8x64mask:
3920; GENERIC:       # %bb.0:
3921; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3922; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3923; GENERIC-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
3924; GENERIC-NEXT:    retq # sched: [1:1.00]
3925;
3926; SKX-LABEL: sext_8x16mem_to_8x64mask:
3927; SKX:       # %bb.0:
3928; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3929; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3930; SKX-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
3931; SKX-NEXT:    retq # sched: [7:1.00]
3932  %a   = load <8 x i16>,<8 x i16> *%i,align 1
3933  %x   = sext <8 x i16> %a to <8 x i64>
3934  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
3935  ret <8 x i64> %ret
3936}
3937
3938define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
3939; GENERIC-LABEL: sext_8x16mem_to_8x64:
3940; GENERIC:       # %bb.0:
3941; GENERIC-NEXT:    vpmovsxwq (%rdi), %zmm0 # sched: [8:1.00]
3942; GENERIC-NEXT:    retq # sched: [1:1.00]
3943;
3944; SKX-LABEL: sext_8x16mem_to_8x64:
3945; SKX:       # %bb.0:
3946; SKX-NEXT:    vpmovsxwq (%rdi), %zmm0 # sched: [10:1.00]
3947; SKX-NEXT:    retq # sched: [7:1.00]
3948  %a   = load <8 x i16>,<8 x i16> *%i,align 1
3949  %x   = sext <8 x i16> %a to <8 x i64>
3950  ret <8 x i64> %x
3951}
3952
3953define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
3954; GENERIC-LABEL: zext_8x16_to_8x64mask:
3955; GENERIC:       # %bb.0:
3956; GENERIC-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
3957; GENERIC-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:0.33]
3958; GENERIC-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
3959; GENERIC-NEXT:    retq # sched: [1:1.00]
3960;
3961; SKX-LABEL: zext_8x16_to_8x64mask:
3962; SKX:       # %bb.0:
3963; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
3964; SKX-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:1.00]
3965; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
3966; SKX-NEXT:    retq # sched: [7:1.00]
3967  %x   = zext <8 x i16> %a to <8 x i64>
3968  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
3969  ret <8 x i64> %ret
3970}
3971
3972define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
3973; GENERIC-LABEL: zext_8x16_to_8x64:
3974; GENERIC:       # %bb.0:
3975; GENERIC-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
3976; GENERIC-NEXT:    retq # sched: [1:1.00]
3977;
3978; SKX-LABEL: zext_8x16_to_8x64:
3979; SKX:       # %bb.0:
3980; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
3981; SKX-NEXT:    retq # sched: [7:1.00]
3982  %ret   = zext <8 x i16> %a to <8 x i64>
3983  ret <8 x i64> %ret
3984}
3985
3986define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
3987; GENERIC-LABEL: zext_2x32mem_to_2x64:
3988; GENERIC:       # %bb.0:
3989; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
3990; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
3991; GENERIC-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [8:1.00]
3992; GENERIC-NEXT:    retq # sched: [1:1.00]
3993;
3994; SKX-LABEL: zext_2x32mem_to_2x64:
3995; SKX:       # %bb.0:
3996; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
3997; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
3998; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [9:1.00]
3999; SKX-NEXT:    retq # sched: [7:1.00]
4000  %a   = load <2 x i32>,<2 x i32> *%i,align 1
4001  %x   = zext <2 x i32> %a to <2 x i64>
4002  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
4003  ret <2 x i64> %ret
4004}
4005
4006define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
4007; GENERIC-LABEL: sext_2x32mem_to_2x64mask:
4008; GENERIC:       # %bb.0:
4009; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
4010; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
4011; GENERIC-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
4012; GENERIC-NEXT:    retq # sched: [1:1.00]
4013;
4014; SKX-LABEL: sext_2x32mem_to_2x64mask:
4015; SKX:       # %bb.0:
4016; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
4017; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
4018; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
4019; SKX-NEXT:    retq # sched: [7:1.00]
4020  %a   = load <2 x i32>,<2 x i32> *%i,align 1
4021  %x   = sext <2 x i32> %a to <2 x i64>
4022  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
4023  ret <2 x i64> %ret
4024}
4025
4026define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
4027; GENERIC-LABEL: sext_2x32mem_to_2x64:
4028; GENERIC:       # %bb.0:
4029; GENERIC-NEXT:    vpmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
4030; GENERIC-NEXT:    retq # sched: [1:1.00]
4031;
4032; SKX-LABEL: sext_2x32mem_to_2x64:
4033; SKX:       # %bb.0:
4034; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
4035; SKX-NEXT:    retq # sched: [7:1.00]
4036  %a   = load <2 x i32>,<2 x i32> *%i,align 1
4037  %x   = sext <2 x i32> %a to <2 x i64>
4038  ret <2 x i64> %x
4039}
4040
4041define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
4042; GENERIC-LABEL: zext_4x32mem_to_4x64:
4043; GENERIC:       # %bb.0:
4044; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
4045; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
4046; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
4047; GENERIC-NEXT:    retq # sched: [1:1.00]
4048;
4049; SKX-LABEL: zext_4x32mem_to_4x64:
4050; SKX:       # %bb.0:
4051; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
4052; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
4053; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4054; SKX-NEXT:    retq # sched: [7:1.00]
4055  %a   = load <4 x i32>,<4 x i32> *%i,align 1
4056  %x   = zext <4 x i32> %a to <4 x i64>
4057  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
4058  ret <4 x i64> %ret
4059}
4060
4061define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
4062; GENERIC-LABEL: sext_4x32mem_to_4x64mask:
4063; GENERIC:       # %bb.0:
4064; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
4065; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
4066; GENERIC-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
4067; GENERIC-NEXT:    retq # sched: [1:1.00]
4068;
4069; SKX-LABEL: sext_4x32mem_to_4x64mask:
4070; SKX:       # %bb.0:
4071; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
4072; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
4073; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
4074; SKX-NEXT:    retq # sched: [7:1.00]
4075  %a   = load <4 x i32>,<4 x i32> *%i,align 1
4076  %x   = sext <4 x i32> %a to <4 x i64>
4077  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
4078  ret <4 x i64> %ret
4079}
4080
4081define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
4082; GENERIC-LABEL: sext_4x32mem_to_4x64:
4083; GENERIC:       # %bb.0:
4084; GENERIC-NEXT:    vpmovsxdq (%rdi), %ymm0 # sched: [8:1.00]
4085; GENERIC-NEXT:    retq # sched: [1:1.00]
4086;
4087; SKX-LABEL: sext_4x32mem_to_4x64:
4088; SKX:       # %bb.0:
4089; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 # sched: [9:1.00]
4090; SKX-NEXT:    retq # sched: [7:1.00]
4091  %a   = load <4 x i32>,<4 x i32> *%i,align 1
4092  %x   = sext <4 x i32> %a to <4 x i64>
4093  ret <4 x i64> %x
4094}
4095
4096define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
4097; GENERIC-LABEL: sext_4x32_to_4x64:
4098; GENERIC:       # %bb.0:
4099; GENERIC-NEXT:    vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
4100; GENERIC-NEXT:    retq # sched: [1:1.00]
4101;
4102; SKX-LABEL: sext_4x32_to_4x64:
4103; SKX:       # %bb.0:
4104; SKX-NEXT:    vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
4105; SKX-NEXT:    retq # sched: [7:1.00]
4106  %x   = sext <4 x i32> %a to <4 x i64>
4107  ret <4 x i64> %x
4108}
4109
4110define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
4111; GENERIC-LABEL: zext_4x32_to_4x64mask:
4112; GENERIC:       # %bb.0:
4113; GENERIC-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
4114; GENERIC-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:0.33]
4115; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
4116; GENERIC-NEXT:    retq # sched: [1:1.00]
4117;
4118; SKX-LABEL: zext_4x32_to_4x64mask:
4119; SKX:       # %bb.0:
4120; SKX-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:0.50]
4121; SKX-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:1.00]
4122; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4123; SKX-NEXT:    retq # sched: [7:1.00]
4124  %x   = zext <4 x i32> %a to <4 x i64>
4125  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
4126  ret <4 x i64> %ret
4127}
4128
4129define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
4130; GENERIC-LABEL: zext_8x32mem_to_8x64:
4131; GENERIC:       # %bb.0:
4132; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
4133; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
4134; GENERIC-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
4135; GENERIC-NEXT:    retq # sched: [1:1.00]
4136;
4137; SKX-LABEL: zext_8x32mem_to_8x64:
4138; SKX:       # %bb.0:
4139; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
4140; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
4141; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00]
4142; SKX-NEXT:    retq # sched: [7:1.00]
4143  %a   = load <8 x i32>,<8 x i32> *%i,align 1
4144  %x   = zext <8 x i32> %a to <8 x i64>
4145  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
4146  ret <8 x i64> %ret
4147}
4148
4149define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
4150; GENERIC-LABEL: sext_8x32mem_to_8x64mask:
4151; GENERIC:       # %bb.0:
4152; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
4153; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
4154; GENERIC-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
4155; GENERIC-NEXT:    retq # sched: [1:1.00]
4156;
4157; SKX-LABEL: sext_8x32mem_to_8x64mask:
4158; SKX:       # %bb.0:
4159; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
4160; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
4161; SKX-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
4162; SKX-NEXT:    retq # sched: [7:1.00]
4163  %a   = load <8 x i32>,<8 x i32> *%i,align 1
4164  %x   = sext <8 x i32> %a to <8 x i64>
4165  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
4166  ret <8 x i64> %ret
4167}
4168
4169define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
4170; GENERIC-LABEL: sext_8x32mem_to_8x64:
4171; GENERIC:       # %bb.0:
4172; GENERIC-NEXT:    vpmovsxdq (%rdi), %zmm0 # sched: [8:1.00]
4173; GENERIC-NEXT:    retq # sched: [1:1.00]
4174;
4175; SKX-LABEL: sext_8x32mem_to_8x64:
4176; SKX:       # %bb.0:
4177; SKX-NEXT:    vpmovsxdq (%rdi), %zmm0 # sched: [10:1.00]
4178; SKX-NEXT:    retq # sched: [7:1.00]
4179  %a   = load <8 x i32>,<8 x i32> *%i,align 1
4180  %x   = sext <8 x i32> %a to <8 x i64>
4181  ret <8 x i64> %x
4182}
4183
4184define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
4185; GENERIC-LABEL: sext_8x32_to_8x64:
4186; GENERIC:       # %bb.0:
4187; GENERIC-NEXT:    vpmovsxdq %ymm0, %zmm0 # sched: [1:1.00]
4188; GENERIC-NEXT:    retq # sched: [1:1.00]
4189;
4190; SKX-LABEL: sext_8x32_to_8x64:
4191; SKX:       # %bb.0:
4192; SKX-NEXT:    vpmovsxdq %ymm0, %zmm0 # sched: [3:1.00]
4193; SKX-NEXT:    retq # sched: [7:1.00]
4194  %x   = sext <8 x i32> %a to <8 x i64>
4195  ret <8 x i64> %x
4196}
4197
4198define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
4199; GENERIC-LABEL: zext_8x32_to_8x64mask:
4200; GENERIC:       # %bb.0:
4201; GENERIC-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
4202; GENERIC-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:0.33]
4203; GENERIC-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [1:1.00]
4204; GENERIC-NEXT:    retq # sched: [1:1.00]
4205;
4206; SKX-LABEL: zext_8x32_to_8x64mask:
4207; SKX:       # %bb.0:
4208; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
4209; SKX-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:1.00]
4210; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [3:1.00]
4211; SKX-NEXT:    retq # sched: [7:1.00]
4212  %x   = zext <8 x i32> %a to <8 x i64>
4213  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
4214  ret <8 x i64> %ret
4215}
4216define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
4217; GENERIC-LABEL: fptrunc_test:
4218; GENERIC:       # %bb.0:
4219; GENERIC-NEXT:    vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
4220; GENERIC-NEXT:    retq # sched: [1:1.00]
4221;
4222; SKX-LABEL: fptrunc_test:
4223; SKX:       # %bb.0:
4224; SKX-NEXT:    vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00]
4225; SKX-NEXT:    retq # sched: [7:1.00]
4226  %b = fptrunc <8 x double> %a to <8 x float>
4227  ret <8 x float> %b
4228}
4229
4230define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
4231; GENERIC-LABEL: fpext_test:
4232; GENERIC:       # %bb.0:
4233; GENERIC-NEXT:    vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
4234; GENERIC-NEXT:    retq # sched: [1:1.00]
4235;
4236; SKX-LABEL: fpext_test:
4237; SKX:       # %bb.0:
4238; SKX-NEXT:    vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00]
4239; SKX-NEXT:    retq # sched: [7:1.00]
4240  %b = fpext <8 x float> %a to <8 x double>
4241  ret <8 x double> %b
4242}
4243
4244define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
4245; GENERIC-LABEL: zext_16i1_to_16xi32:
4246; GENERIC:       # %bb.0:
4247; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
4248; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
4249; GENERIC-NEXT:    vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
4250; GENERIC-NEXT:    retq # sched: [1:1.00]
4251;
4252; SKX-LABEL: zext_16i1_to_16xi32:
4253; SKX:       # %bb.0:
4254; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
4255; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
4256; SKX-NEXT:    vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
4257; SKX-NEXT:    retq # sched: [7:1.00]
4258  %a = bitcast i16 %b to <16 x i1>
4259  %c = zext <16 x i1> %a to <16 x i32>
4260  ret <16 x i32> %c
4261}
4262
4263define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
4264; GENERIC-LABEL: zext_8i1_to_8xi64:
4265; GENERIC:       # %bb.0:
4266; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
4267; GENERIC-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.33]
4268; GENERIC-NEXT:    vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00]
4269; GENERIC-NEXT:    retq # sched: [1:1.00]
4270;
4271; SKX-LABEL: zext_8i1_to_8xi64:
4272; SKX:       # %bb.0:
4273; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
4274; SKX-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.25]
4275; SKX-NEXT:    vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00]
4276; SKX-NEXT:    retq # sched: [7:1.00]
4277  %a = bitcast i8 %b to <8 x i1>
4278  %c = zext <8 x i1> %a to <8 x i64>
4279  ret <8 x i64> %c
4280}
4281
4282define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
4283; GENERIC-LABEL: trunc_16i8_to_16i1:
4284; GENERIC:       # %bb.0:
4285; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
4286; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
4287; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
4288; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
4289; GENERIC-NEXT:    retq # sched: [1:1.00]
4290;
4291; SKX-LABEL: trunc_16i8_to_16i1:
4292; SKX:       # %bb.0:
4293; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
4294; SKX-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:1.00]
4295; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
4296; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
4297; SKX-NEXT:    retq # sched: [7:1.00]
4298  %mask_b = trunc <16 x i8>%a to <16 x i1>
4299  %mask = bitcast <16 x i1> %mask_b to i16
4300  ret i16 %mask
4301}
4302
4303define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
4304; GENERIC-LABEL: trunc_16i32_to_16i1:
4305; GENERIC:       # %bb.0:
4306; GENERIC-NEXT:    vpslld $31, %zmm0, %zmm0 # sched: [1:1.00]
4307; GENERIC-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:0.33]
4308; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
4309; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
4310; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
4311; GENERIC-NEXT:    retq # sched: [1:1.00]
4312;
4313; SKX-LABEL: trunc_16i32_to_16i1:
4314; SKX:       # %bb.0:
4315; SKX-NEXT:    vpslld $31, %zmm0, %zmm0 # sched: [1:1.00]
4316; SKX-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:1.00]
4317; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
4318; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
4319; SKX-NEXT:    vzeroupper # sched: [4:1.00]
4320; SKX-NEXT:    retq # sched: [7:1.00]
4321  %mask_b = trunc <16 x i32>%a to <16 x i1>
4322  %mask = bitcast <16 x i1> %mask_b to i16
4323  ret i16 %mask
4324}
4325
4326define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
4327; GENERIC-LABEL: trunc_4i32_to_4i1:
4328; GENERIC:       # %bb.0:
4329; GENERIC-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
4330; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
4331; GENERIC-NEXT:    vpsrad $31, %xmm0, %xmm0 # sched: [1:1.00]
4332; GENERIC-NEXT:    retq # sched: [1:1.00]
4333;
4334; SKX-LABEL: trunc_4i32_to_4i1:
4335; SKX:       # %bb.0:
4336; SKX-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
4337; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
4338; SKX-NEXT:    vpsrad $31, %xmm0, %xmm0 # sched: [1:0.50]
4339; SKX-NEXT:    retq # sched: [7:1.00]
4340  %mask_a = trunc <4 x i32>%a to <4 x i1>
4341  %mask_b = trunc <4 x i32>%b to <4 x i1>
4342  %a_and_b = and <4 x i1>%mask_a, %mask_b
4343  %res = sext <4 x i1>%a_and_b to <4 x i32>
4344  ret <4 x i32>%res
4345}
4346
4347
4348define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
4349; GENERIC-LABEL: trunc_8i16_to_8i1:
4350; GENERIC:       # %bb.0:
4351; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
4352; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
4353; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
4354; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
4355; GENERIC-NEXT:    retq # sched: [1:1.00]
4356;
4357; SKX-LABEL: trunc_8i16_to_8i1:
4358; SKX:       # %bb.0:
4359; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
4360; SKX-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:1.00]
4361; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
4362; SKX-NEXT:    # kill: def $al killed $al killed $eax
4363; SKX-NEXT:    retq # sched: [7:1.00]
4364  %mask_b = trunc <8 x i16>%a to <8 x i1>
4365  %mask = bitcast <8 x i1> %mask_b to i8
4366  ret i8 %mask
4367}
4368
4369define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
4370; GENERIC-LABEL: sext_8i1_8i32:
4371; GENERIC:       # %bb.0:
4372; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
4373; GENERIC-NEXT:    vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.50]
4374; GENERIC-NEXT:    retq # sched: [1:1.00]
4375;
4376; SKX-LABEL: sext_8i1_8i32:
4377; SKX:       # %bb.0:
4378; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
4379; SKX-NEXT:    vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.33]
4380; SKX-NEXT:    retq # sched: [7:1.00]
4381  %x = icmp slt <8 x i32> %a1, %a2
4382  %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
4383  %y = sext <8 x i1> %x1 to <8 x i32>
4384  ret <8 x i32> %y
4385}
4386
4387
4388define i16 @trunc_i32_to_i1(i32 %a) {
4389; GENERIC-LABEL: trunc_i32_to_i1:
4390; GENERIC:       # %bb.0:
4391; GENERIC-NEXT:    movw $-4, %ax # sched: [1:0.33]
4392; GENERIC-NEXT:    kmovd %eax, %k0 # sched: [1:0.33]
4393; GENERIC-NEXT:    kshiftrw $1, %k0, %k0 # sched: [1:1.00]
4394; GENERIC-NEXT:    kshiftlw $1, %k0, %k0 # sched: [1:1.00]
4395; GENERIC-NEXT:    andl $1, %edi # sched: [1:0.33]
4396; GENERIC-NEXT:    kmovw %edi, %k1 # sched: [1:0.33]
4397; GENERIC-NEXT:    korw %k1, %k0, %k0 # sched: [1:0.33]
4398; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
4399; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
4400; GENERIC-NEXT:    retq # sched: [1:1.00]
4401;
4402; SKX-LABEL: trunc_i32_to_i1:
4403; SKX:       # %bb.0:
4404; SKX-NEXT:    movw $-4, %ax # sched: [1:0.25]
4405; SKX-NEXT:    kmovd %eax, %k0 # sched: [1:1.00]
4406; SKX-NEXT:    kshiftrw $1, %k0, %k0 # sched: [3:1.00]
4407; SKX-NEXT:    kshiftlw $1, %k0, %k0 # sched: [3:1.00]
4408; SKX-NEXT:    andl $1, %edi # sched: [1:0.25]
4409; SKX-NEXT:    kmovw %edi, %k1 # sched: [1:1.00]
4410; SKX-NEXT:    korw %k1, %k0, %k0 # sched: [1:1.00]
4411; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
4412; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
4413; SKX-NEXT:    retq # sched: [7:1.00]
4414  %a_i = trunc i32 %a to i1
4415  %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
4416  %res = bitcast <16 x i1> %maskv to i16
4417  ret i16 %res
4418}
4419
4420define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
4421; GENERIC-LABEL: sext_8i1_8i16:
4422; GENERIC:       # %bb.0:
4423; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50]
4424; GENERIC-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.33]
4425; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
4426; GENERIC-NEXT:    retq # sched: [1:1.00]
4427;
4428; SKX-LABEL: sext_8i1_8i16:
4429; SKX:       # %bb.0:
4430; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
4431; SKX-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.25]
4432; SKX-NEXT:    vzeroupper # sched: [4:1.00]
4433; SKX-NEXT:    retq # sched: [7:1.00]
4434  %x = icmp slt <8 x i32> %a1, %a2
4435  %y = sext <8 x i1> %x to <8 x i16>
4436  ret <8 x i16> %y
4437}
4438
4439define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
4440; GENERIC-LABEL: sext_16i1_16i32:
4441; GENERIC:       # %bb.0:
4442; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0 # sched: [1:0.50]
4443; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
4444; GENERIC-NEXT:    retq # sched: [1:1.00]
4445;
4446; SKX-LABEL: sext_16i1_16i32:
4447; SKX:       # %bb.0:
4448; SKX-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
4449; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
4450; SKX-NEXT:    retq # sched: [7:1.00]
4451  %x = icmp slt <16 x i32> %a1, %a2
4452  %y = sext <16 x i1> %x to <16 x i32>
4453  ret <16 x i32> %y
4454}
4455
4456define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
4457; GENERIC-LABEL: sext_8i1_8i64:
4458; GENERIC:       # %bb.0:
4459; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50]
4460; GENERIC-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.33]
4461; GENERIC-NEXT:    retq # sched: [1:1.00]
4462;
4463; SKX-LABEL: sext_8i1_8i64:
4464; SKX:       # %bb.0:
4465; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
4466; SKX-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.25]
4467; SKX-NEXT:    retq # sched: [7:1.00]
4468  %x = icmp slt <8 x i32> %a1, %a2
4469  %y = sext <8 x i1> %x to <8 x i64>
4470  ret <8 x i64> %y
4471}
4472
4473define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
4474; GENERIC-LABEL: extload_v8i64:
4475; GENERIC:       # %bb.0:
4476; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
4477; GENERIC-NEXT:    vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
4478; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
4479; GENERIC-NEXT:    retq # sched: [1:1.00]
4480;
4481; SKX-LABEL: extload_v8i64:
4482; SKX:       # %bb.0:
4483; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00]
4484; SKX-NEXT:    vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
4485; SKX-NEXT:    vzeroupper # sched: [4:1.00]
4486; SKX-NEXT:    retq # sched: [7:1.00]
4487  %sign_load = load <8 x i8>, <8 x i8>* %a
4488  %c = sext <8 x i8> %sign_load to <8 x i64>
4489  store <8 x i64> %c, <8 x i64>* %res
4490  ret void
4491}
4492
4493define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
4494; GENERIC-LABEL: test21:
4495; GENERIC:       # %bb.0:
4496; GENERIC-NEXT:    vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00]
4497; GENERIC-NEXT:    vpmovb2m %zmm2, %k1 # sched: [1:0.33]
4498; GENERIC-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
4499; GENERIC-NEXT:    kshiftrq $32, %k1, %k1 # sched: [1:1.00]
4500; GENERIC-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.50]
4501; GENERIC-NEXT:    retq # sched: [1:1.00]
4502;
4503; SKX-LABEL: test21:
4504; SKX:       # %bb.0:
4505; SKX-NEXT:    vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00]
4506; SKX-NEXT:    vpmovb2m %zmm2, %k1 # sched: [1:1.00]
4507; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
4508; SKX-NEXT:    kshiftrq $32, %k1, %k1 # sched: [3:1.00]
4509; SKX-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33]
4510; SKX-NEXT:    retq # sched: [7:1.00]
4511  %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
4512  ret <64 x i16> %ret
4513}
4514
4515define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
4516; GENERIC-LABEL: shuffle_zext_16x8_to_16x16:
4517; GENERIC:       # %bb.0:
4518; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
4519; GENERIC-NEXT:    retq # sched: [1:1.00]
4520;
4521; SKX-LABEL: shuffle_zext_16x8_to_16x16:
4522; SKX:       # %bb.0:
4523; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4524; SKX-NEXT:    retq # sched: [7:1.00]
4525  %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
4526  %2 = bitcast <32 x i8> %1 to <16 x i16>
4527  ret <16 x i16> %2
4528}
4529
4530define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone {
4531; GENERIC-LABEL: shuffle_zext_16x8_to_16x16_mask:
4532; GENERIC:       # %bb.0:
4533; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
4534; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
4535; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
4536; GENERIC-NEXT:    retq # sched: [1:1.00]
4537;
4538; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask:
4539; SKX:       # %bb.0:
4540; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
4541; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
4542; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4543; SKX-NEXT:    retq # sched: [7:1.00]
4544  %x   = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
4545  %bc  = bitcast <32 x i8> %x to <16 x i16>
4546  %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
4547  ret <16 x i16> %ret
4548}
4549
4550define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
4551; GENERIC-LABEL: zext_32x8_to_16x16:
4552; GENERIC:       # %bb.0:
4553; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
4554; GENERIC-NEXT:    retq # sched: [1:1.00]
4555;
4556; SKX-LABEL: zext_32x8_to_16x16:
4557; SKX:       # %bb.0:
4558; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4559; SKX-NEXT:    retq # sched: [7:1.00]
4560  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
4561  %2 = bitcast <32 x i8> %1 to <16 x i16>
4562  ret <16 x i16> %2
4563}
4564
4565define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
4566; GENERIC-LABEL: zext_32x8_to_8x32:
4567; GENERIC:       # %bb.0:
4568; GENERIC-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
4569; GENERIC-NEXT:    retq # sched: [1:1.00]
4570;
4571; SKX-LABEL: zext_32x8_to_8x32:
4572; SKX:       # %bb.0:
4573; SKX-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
4574; SKX-NEXT:    retq # sched: [7:1.00]
4575  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
4576  %2 = bitcast <32 x i8> %1 to <8 x i32>
4577  ret <8 x i32> %2
4578}
4579
4580define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
4581; GENERIC-LABEL: zext_32x8_to_4x64:
4582; GENERIC:       # %bb.0:
4583; GENERIC-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
4584; GENERIC-NEXT:    retq # sched: [1:1.00]
4585;
4586; SKX-LABEL: zext_32x8_to_4x64:
4587; SKX:       # %bb.0:
4588; SKX-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4589; SKX-NEXT:    retq # sched: [7:1.00]
4590  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
4591  %2 = bitcast <32 x i8> %1 to <4 x i64>
4592  ret <4 x i64> %2
4593}
4594
4595define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
4596; GENERIC-LABEL: zext_16x16_to_8x32:
4597; GENERIC:       # %bb.0:
4598; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
4599; GENERIC-NEXT:    retq # sched: [1:1.00]
4600;
4601; SKX-LABEL: zext_16x16_to_8x32:
4602; SKX:       # %bb.0:
4603; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
4604; SKX-NEXT:    retq # sched: [7:1.00]
4605  %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
4606  %2 = bitcast <16 x i16> %1 to <8 x i32>
4607  ret <8 x i32> %2
4608}
4609
4610define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
4611; GENERIC-LABEL: zext_16x16_to_4x64:
4612; GENERIC:       # %bb.0:
4613; GENERIC-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
4614; GENERIC-NEXT:    retq # sched: [1:1.00]
4615;
4616; SKX-LABEL: zext_16x16_to_4x64:
4617; SKX:       # %bb.0:
4618; SKX-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4619; SKX-NEXT:    retq # sched: [7:1.00]
4620  %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
4621  %2 = bitcast <16 x i16> %1 to <4 x i64>
4622  ret <4 x i64> %2
4623}
4624
4625define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
4626; GENERIC-LABEL: zext_8x32_to_4x64:
4627; GENERIC:       # %bb.0:
4628; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
4629; GENERIC-NEXT:    retq # sched: [1:1.00]
4630;
4631; SKX-LABEL: zext_8x32_to_4x64:
4632; SKX:       # %bb.0:
4633; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4634; SKX-NEXT:    retq # sched: [7:1.00]
4635  %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
4636  %2 = bitcast <8 x i32> %1 to <4 x i64>
4637  ret <4 x i64> %2
4638}
4639
4640define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
4641; GENERIC-LABEL: zext_64xi1_to_64xi8:
4642; GENERIC:       # %bb.0:
4643; GENERIC-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1 # sched: [1:0.50]
4644; GENERIC-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [7:0.50]
4645; GENERIC-NEXT:    retq # sched: [1:1.00]
4646;
4647; SKX-LABEL: zext_64xi1_to_64xi8:
4648; SKX:       # %bb.0:
4649; SKX-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00]
4650; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
4651; SKX-NEXT:    retq # sched: [7:1.00]
4652  %mask = icmp eq <64 x i8> %x, %y
4653  %1 = zext <64 x i1> %mask to <64 x i8>
4654  ret <64 x i8> %1
4655}
4656
4657define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
4658; GENERIC-LABEL: zext_32xi1_to_32xi16:
4659; GENERIC:       # %bb.0:
4660; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # sched: [1:0.50]
4661; GENERIC-NEXT:    vpmovm2w %k0, %zmm0 # sched: [1:0.33]
4662; GENERIC-NEXT:    vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00]
4663; GENERIC-NEXT:    retq # sched: [1:1.00]
4664;
4665; SKX-LABEL: zext_32xi1_to_32xi16:
4666; SKX:       # %bb.0:
4667; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
4668; SKX-NEXT:    vpmovm2w %k0, %zmm0 # sched: [1:0.25]
4669; SKX-NEXT:    vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00]
4670; SKX-NEXT:    retq # sched: [7:1.00]
4671  %mask = icmp eq <32 x i16> %x, %y
4672  %1 = zext <32 x i1> %mask to <32 x i16>
4673  ret <32 x i16> %1
4674}
4675
4676define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
4677; GENERIC-LABEL: zext_16xi1_to_16xi16:
4678; GENERIC:       # %bb.0:
4679; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4680; GENERIC-NEXT:    vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00]
4681; GENERIC-NEXT:    retq # sched: [1:1.00]
4682;
4683; SKX-LABEL: zext_16xi1_to_16xi16:
4684; SKX:       # %bb.0:
4685; SKX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4686; SKX-NEXT:    vpsrlw $15, %ymm0, %ymm0 # sched: [1:0.50]
4687; SKX-NEXT:    retq # sched: [7:1.00]
4688  %mask = icmp eq <16 x i16> %x, %y
4689  %1 = zext <16 x i1> %mask to <16 x i16>
4690  ret <16 x i16> %1
4691}
4692
4693
4694define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
4695; GENERIC-LABEL: zext_32xi1_to_32xi8:
4696; GENERIC:       # %bb.0:
4697; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [1:0.50]
4698; GENERIC-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [7:0.50]
4699; GENERIC-NEXT:    retq # sched: [1:1.00]
4700;
4701; SKX-LABEL: zext_32xi1_to_32xi8:
4702; SKX:       # %bb.0:
4703; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
4704; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
4705; SKX-NEXT:    retq # sched: [7:1.00]
4706  %mask = icmp eq <32 x i16> %x, %y
4707  %1 = zext <32 x i1> %mask to <32 x i8>
4708  ret <32 x i8> %1
4709}
4710
4711define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
4712; GENERIC-LABEL: zext_4xi1_to_4x32:
4713; GENERIC:       # %bb.0:
4714; GENERIC-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50]
4715; GENERIC-NEXT:    vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
4716; GENERIC-NEXT:    vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4717; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4718; GENERIC-NEXT:    vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
4719; GENERIC-NEXT:    retq # sched: [1:1.00]
4720;
4721; SKX-LABEL: zext_4xi1_to_4x32:
4722; SKX:       # %bb.0:
4723; SKX-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50]
4724; SKX-NEXT:    vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
4725; SKX-NEXT:    vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4726; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4727; SKX-NEXT:    vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
4728; SKX-NEXT:    retq # sched: [7:1.00]
4729  %mask = icmp eq <4 x i8> %x, %y
4730  %1 = zext <4 x i1> %mask to <4 x i32>
4731  ret <4 x i32> %1
4732}
4733
4734define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
4735; GENERIC-LABEL: zext_2xi1_to_2xi64:
4736; GENERIC:       # %bb.0:
4737; GENERIC-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50]
4738; GENERIC-NEXT:    vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
4739; GENERIC-NEXT:    vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4740; GENERIC-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4741; GENERIC-NEXT:    vpsrlq $63, %xmm0, %xmm0 # sched: [1:1.00]
4742; GENERIC-NEXT:    retq # sched: [1:1.00]
4743;
4744; SKX-LABEL: zext_2xi1_to_2xi64:
4745; SKX:       # %bb.0:
4746; SKX-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50]
4747; SKX-NEXT:    vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
4748; SKX-NEXT:    vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4749; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4750; SKX-NEXT:    vpsrlq $63, %xmm0, %xmm0 # sched: [1:0.50]
4751; SKX-NEXT:    retq # sched: [7:1.00]
4752  %mask = icmp eq <2 x i8> %x, %y
4753  %1 = zext <2 x i1> %mask to <2 x i64>
4754  ret <2 x i64> %1
4755}
4756
4757define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
4758; GENERIC-LABEL: test_x86_fmadd_ps_z:
4759; GENERIC:       # %bb.0:
4760; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4761; GENERIC-NEXT:    vaddps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
4762; GENERIC-NEXT:    retq # sched: [1:1.00]
4763;
4764; SKX-LABEL: test_x86_fmadd_ps_z:
4765; SKX:       # %bb.0:
4766; SKX-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
4767; SKX-NEXT:    vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
4768; SKX-NEXT:    retq # sched: [7:1.00]
4769  %x = fmul <16 x float> %a0, %a1
4770  %res = fadd <16 x float> %x, %a2
4771  ret <16 x float> %res
4772}
4773
4774define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
4775; GENERIC-LABEL: test_x86_fmsub_ps_z:
4776; GENERIC:       # %bb.0:
4777; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4778; GENERIC-NEXT:    vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
4779; GENERIC-NEXT:    retq # sched: [1:1.00]
4780;
4781; SKX-LABEL: test_x86_fmsub_ps_z:
4782; SKX:       # %bb.0:
4783; SKX-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
4784; SKX-NEXT:    vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
4785; SKX-NEXT:    retq # sched: [7:1.00]
4786  %x = fmul <16 x float> %a0, %a1
4787  %res = fsub <16 x float> %x, %a2
4788  ret <16 x float> %res
4789}
4790
4791define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
4792; GENERIC-LABEL: test_x86_fnmadd_ps_z:
4793; GENERIC:       # %bb.0:
4794; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4795; GENERIC-NEXT:    vsubps %zmm0, %zmm2, %zmm0 # sched: [3:1.00]
4796; GENERIC-NEXT:    retq # sched: [1:1.00]
4797;
4798; SKX-LABEL: test_x86_fnmadd_ps_z:
4799; SKX:       # %bb.0:
4800; SKX-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
4801; SKX-NEXT:    vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.50]
4802; SKX-NEXT:    retq # sched: [7:1.00]
4803  %x = fmul <16 x float> %a0, %a1
4804  %res = fsub <16 x float> %a2, %x
4805  ret <16 x float> %res
4806}
4807
4808define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
4809; GENERIC-LABEL: test_x86_fnmsub_ps_z:
4810; GENERIC:       # %bb.0:
4811; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4812; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
4813; GENERIC-NEXT:    vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
4814; GENERIC-NEXT:    retq # sched: [1:1.00]
4815;
4816; SKX-LABEL: test_x86_fnmsub_ps_z:
4817; SKX:       # %bb.0:
4818; SKX-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
4819; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
4820; SKX-NEXT:    vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
4821; SKX-NEXT:    retq # sched: [7:1.00]
4822  %x = fmul <16 x float> %a0, %a1
4823  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
4824                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
4825                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
4826                          float -0.000000e+00>, %x
4827  %res = fsub <16 x float> %y, %a2
4828  ret <16 x float> %res
4829}
4830
4831define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
4832; GENERIC-LABEL: test_x86_fmadd_pd_z:
4833; GENERIC:       # %bb.0:
4834; GENERIC-NEXT:    vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4835; GENERIC-NEXT:    vaddpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
4836; GENERIC-NEXT:    retq # sched: [1:1.00]
4837;
4838; SKX-LABEL: test_x86_fmadd_pd_z:
4839; SKX:       # %bb.0:
4840; SKX-NEXT:    vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
4841; SKX-NEXT:    vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
4842; SKX-NEXT:    retq # sched: [7:1.00]
4843  %x = fmul <8 x double> %a0, %a1
4844  %res = fadd <8 x double> %x, %a2
4845  ret <8 x double> %res
4846}
4847
4848define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
4849; GENERIC-LABEL: test_x86_fmsub_pd_z:
4850; GENERIC:       # %bb.0:
4851; GENERIC-NEXT:    vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4852; GENERIC-NEXT:    vsubpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
4853; GENERIC-NEXT:    retq # sched: [1:1.00]
4854;
4855; SKX-LABEL: test_x86_fmsub_pd_z:
4856; SKX:       # %bb.0:
4857; SKX-NEXT:    vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
4858; SKX-NEXT:    vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
4859; SKX-NEXT:    retq # sched: [7:1.00]
4860  %x = fmul <8 x double> %a0, %a1
4861  %res = fsub <8 x double> %x, %a2
4862  ret <8 x double> %res
4863}
4864
4865define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
4866; GENERIC-LABEL: test_x86_fmsub_213:
4867; GENERIC:       # %bb.0:
4868; GENERIC-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
4869; GENERIC-NEXT:    vsubsd %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
4870; GENERIC-NEXT:    retq # sched: [1:1.00]
4871;
4872; SKX-LABEL: test_x86_fmsub_213:
4873; SKX:       # %bb.0:
4874; SKX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
4875; SKX-NEXT:    vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.50]
4876; SKX-NEXT:    retq # sched: [7:1.00]
4877  %x = fmul double %a0, %a1
4878  %res = fsub double %x, %a2
4879  ret double %res
4880}
4881
4882define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
4883; GENERIC-LABEL: test_x86_fmsub_213_m:
4884; GENERIC:       # %bb.0:
4885; GENERIC-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
4886; GENERIC-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
4887; GENERIC-NEXT:    retq # sched: [1:1.00]
4888;
4889; SKX-LABEL: test_x86_fmsub_213_m:
4890; SKX:       # %bb.0:
4891; SKX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
4892; SKX-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
4893; SKX-NEXT:    retq # sched: [7:1.00]
4894  %a2 = load double , double *%a2_ptr
4895  %x = fmul double %a0, %a1
4896  %res = fsub double %x, %a2
4897  ret double %res
4898}
4899
4900define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
4901; GENERIC-LABEL: test_x86_fmsub_231_m:
4902; GENERIC:       # %bb.0:
4903; GENERIC-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
4904; GENERIC-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
4905; GENERIC-NEXT:    retq # sched: [1:1.00]
4906;
4907; SKX-LABEL: test_x86_fmsub_231_m:
4908; SKX:       # %bb.0:
4909; SKX-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
4910; SKX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
4911; SKX-NEXT:    retq # sched: [7:1.00]
4912  %a2 = load double , double *%a2_ptr
4913  %x = fmul double %a0, %a2
4914  %res = fsub double %x, %a1
4915  ret double %res
4916}
4917
4918define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
4919; GENERIC-LABEL: test231_br:
4920; GENERIC:       # %bb.0:
4921; GENERIC-NEXT:    vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [12:1.00]
4922; GENERIC-NEXT:    vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
4923; GENERIC-NEXT:    retq # sched: [1:1.00]
4924;
4925; SKX-LABEL: test231_br:
4926; SKX:       # %bb.0:
4927; SKX-NEXT:    vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
4928; SKX-NEXT:    vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
4929; SKX-NEXT:    retq # sched: [7:1.00]
4930  %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
4931  %b2 = fadd <16 x float> %b1, %a2
4932  ret <16 x float> %b2
4933}
4934
4935define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
4936; GENERIC-LABEL: test213_br:
4937; GENERIC:       # %bb.0:
4938; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4939; GENERIC-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00]
4940; GENERIC-NEXT:    retq # sched: [1:1.00]
4941;
4942; SKX-LABEL: test213_br:
4943; SKX:       # %bb.0:
4944; SKX-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
4945; SKX-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
4946; SKX-NEXT:    retq # sched: [7:1.00]
4947  %b1 = fmul <16 x float> %a1, %a2
4948  %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
4949  ret <16 x float> %b2
4950}
4951
4952;mask (a*c+b , a)
4953define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
4954; GENERIC-LABEL: test_x86_fmadd132_ps:
4955; GENERIC:       # %bb.0:
4956; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
4957; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
4958; GENERIC-NEXT:    vmulps (%rdi), %zmm0, %zmm2 # sched: [12:1.00]
4959; GENERIC-NEXT:    vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
4960; GENERIC-NEXT:    retq # sched: [1:1.00]
4961;
4962; SKX-LABEL: test_x86_fmadd132_ps:
4963; SKX:       # %bb.0:
4964; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
4965; SKX-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:1.00]
4966; SKX-NEXT:    vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50]
4967; SKX-NEXT:    vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.50]
4968; SKX-NEXT:    retq # sched: [7:1.00]
4969  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
4970  %x = fmul <16 x float> %a0, %a2
4971  %y = fadd <16 x float> %x, %a1
4972  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
4973  ret <16 x float> %res
4974}
4975
4976;mask (a*c+b , b)
4977define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
4978; GENERIC-LABEL: test_x86_fmadd231_ps:
4979; GENERIC:       # %bb.0:
4980; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
4981; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
4982; GENERIC-NEXT:    vmulps (%rdi), %zmm0, %zmm0 # sched: [12:1.00]
4983; GENERIC-NEXT:    vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00]
4984; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
4985; GENERIC-NEXT:    retq # sched: [1:1.00]
4986;
4987; SKX-LABEL: test_x86_fmadd231_ps:
4988; SKX:       # %bb.0:
4989; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
4990; SKX-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:1.00]
4991; SKX-NEXT:    vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
4992; SKX-NEXT:    vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.50]
4993; SKX-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:0.33]
4994; SKX-NEXT:    retq # sched: [7:1.00]
4995  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
4996  %x = fmul <16 x float> %a0, %a2
4997  %y = fadd <16 x float> %x, %a1
4998  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
4999  ret <16 x float> %res
5000}
5001
5002;mask (b*a+c , b)
5003define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
5004; GENERIC-LABEL: test_x86_fmadd213_ps:
5005; GENERIC:       # %bb.0:
5006; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
5007; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
5008; GENERIC-NEXT:    vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
5009; GENERIC-NEXT:    vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [10:1.00]
5010; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
5011; GENERIC-NEXT:    retq # sched: [1:1.00]
5012;
5013; SKX-LABEL: test_x86_fmadd213_ps:
5014; SKX:       # %bb.0:
5015; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
5016; SKX-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:1.00]
5017; SKX-NEXT:    vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
5018; SKX-NEXT:    vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50]
5019; SKX-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:0.33]
5020; SKX-NEXT:    retq # sched: [7:1.00]
5021  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
5022  %x = fmul <16 x float> %a1, %a0
5023  %y = fadd <16 x float> %x, %a2
5024  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
5025  ret <16 x float> %res
5026}
5027
5028define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
5029; GENERIC-LABEL: vpandd:
5030; GENERIC:       # %bb.0: # %entry
5031; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5032; GENERIC-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5033; GENERIC-NEXT:    retq # sched: [1:1.00]
5034;
5035; SKX-LABEL: vpandd:
5036; SKX:       # %bb.0: # %entry
5037; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5038; SKX-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5039; SKX-NEXT:    retq # sched: [7:1.00]
5040entry:
5041  ; Force the execution domain with an add.
5042  %a2 = add <16 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2,
5043                            i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5044  %x = and <16 x i32> %a2, %b
5045  ret <16 x i32> %x
5046}
5047
5048define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
5049; GENERIC-LABEL: vpandnd:
5050; GENERIC:       # %bb.0: # %entry
5051; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5052; GENERIC-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
5053; GENERIC-NEXT:    retq # sched: [1:1.00]
5054;
5055; SKX-LABEL: vpandnd:
5056; SKX:       # %bb.0: # %entry
5057; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5058; SKX-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
5059; SKX-NEXT:    retq # sched: [7:1.00]
5060entry:
5061  ; Force the execution domain with an add.
5062  %a2 = add <16 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3,
5063                            i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
5064  %b2 = xor <16 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1,
5065                            i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
5066  %x = and <16 x i32> %a2, %b2
5067  ret <16 x i32> %x
5068}
5069
5070define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
5071; GENERIC-LABEL: vpord:
5072; GENERIC:       # %bb.0: # %entry
5073; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5074; GENERIC-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5075; GENERIC-NEXT:    retq # sched: [1:1.00]
5076;
5077; SKX-LABEL: vpord:
5078; SKX:       # %bb.0: # %entry
5079; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5080; SKX-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5081; SKX-NEXT:    retq # sched: [7:1.00]
5082entry:
5083  ; Force the execution domain with an add.
5084  %a2 = add <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4,
5085                            i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
5086  %x = or <16 x i32> %a2, %b
5087  ret <16 x i32> %x
5088}
5089
5090define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
5091; GENERIC-LABEL: vpxord:
5092; GENERIC:       # %bb.0: # %entry
5093; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5094; GENERIC-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5095; GENERIC-NEXT:    retq # sched: [1:1.00]
5096;
5097; SKX-LABEL: vpxord:
5098; SKX:       # %bb.0: # %entry
5099; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5100; SKX-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5101; SKX-NEXT:    retq # sched: [7:1.00]
5102entry:
5103  ; Force the execution domain with an add.
5104  %a2 = add <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5,
5105                            i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
5106  %x = xor <16 x i32> %a2, %b
5107  ret <16 x i32> %x
5108}
5109
5110define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
5111; GENERIC-LABEL: vpandq:
5112; GENERIC:       # %bb.0: # %entry
5113; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5114; GENERIC-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5115; GENERIC-NEXT:    retq # sched: [1:1.00]
5116;
5117; SKX-LABEL: vpandq:
5118; SKX:       # %bb.0: # %entry
5119; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5120; SKX-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5121; SKX-NEXT:    retq # sched: [7:1.00]
5122entry:
5123  ; Force the execution domain with an add.
5124  %a2 = add <8 x i64> %a, <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>
5125  %x = and <8 x i64> %a2, %b
5126  ret <8 x i64> %x
5127}
5128
5129define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
5130; GENERIC-LABEL: vpandnq:
5131; GENERIC:       # %bb.0: # %entry
5132; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5133; GENERIC-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
5134; GENERIC-NEXT:    retq # sched: [1:1.00]
5135;
5136; SKX-LABEL: vpandnq:
5137; SKX:       # %bb.0: # %entry
5138; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5139; SKX-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
5140; SKX-NEXT:    retq # sched: [7:1.00]
5141entry:
5142  ; Force the execution domain with an add.
5143  %a2 = add <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
5144  %b2 = xor <8 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
5145  %x = and <8 x i64> %a2, %b2
5146  ret <8 x i64> %x
5147}
5148
5149define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
5150; GENERIC-LABEL: vporq:
5151; GENERIC:       # %bb.0: # %entry
5152; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5153; GENERIC-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5154; GENERIC-NEXT:    retq # sched: [1:1.00]
5155;
5156; SKX-LABEL: vporq:
5157; SKX:       # %bb.0: # %entry
5158; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5159; SKX-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5160; SKX-NEXT:    retq # sched: [7:1.00]
5161entry:
5162  ; Force the execution domain with an add.
5163  %a2 = add <8 x i64> %a, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
5164  %x = or <8 x i64> %a2, %b
5165  ret <8 x i64> %x
5166}
5167
5168define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
5169; GENERIC-LABEL: vpxorq:
5170; GENERIC:       # %bb.0: # %entry
5171; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5172; GENERIC-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5173; GENERIC-NEXT:    retq # sched: [1:1.00]
5174;
5175; SKX-LABEL: vpxorq:
5176; SKX:       # %bb.0: # %entry
5177; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5178; SKX-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5179; SKX-NEXT:    retq # sched: [7:1.00]
5180entry:
5181  ; Force the execution domain with an add.
5182  %a2 = add <8 x i64> %a, <i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9>
5183  %x = xor <8 x i64> %a2, %b
5184  ret <8 x i64> %x
5185}
5186
5187define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
5188; GENERIC-LABEL: and_v64i8:
5189; GENERIC:       # %bb.0:
5190; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5191; GENERIC-NEXT:    retq # sched: [1:1.00]
5192;
5193; SKX-LABEL: and_v64i8:
5194; SKX:       # %bb.0:
5195; SKX-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5196; SKX-NEXT:    retq # sched: [7:1.00]
5197  %res = and <64 x i8> %a, %b
5198  ret <64 x i8> %res
5199}
5200
5201define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) {
5202; GENERIC-LABEL: andn_v64i8:
5203; GENERIC:       # %bb.0:
5204; GENERIC-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
5205; GENERIC-NEXT:    retq # sched: [1:1.00]
5206;
5207; SKX-LABEL: andn_v64i8:
5208; SKX:       # %bb.0:
5209; SKX-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
5210; SKX-NEXT:    retq # sched: [7:1.00]
5211  %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
5212                           i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
5213                           i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
5214                           i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
5215  %res = and <64 x i8> %a, %b2
5216  ret <64 x i8> %res
5217}
5218
5219define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
5220; GENERIC-LABEL: or_v64i8:
5221; GENERIC:       # %bb.0:
5222; GENERIC-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5223; GENERIC-NEXT:    retq # sched: [1:1.00]
5224;
5225; SKX-LABEL: or_v64i8:
5226; SKX:       # %bb.0:
5227; SKX-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5228; SKX-NEXT:    retq # sched: [7:1.00]
5229  %res = or <64 x i8> %a, %b
5230  ret <64 x i8> %res
5231}
5232
5233define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
5234; GENERIC-LABEL: xor_v64i8:
5235; GENERIC:       # %bb.0:
5236; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5237; GENERIC-NEXT:    retq # sched: [1:1.00]
5238;
5239; SKX-LABEL: xor_v64i8:
5240; SKX:       # %bb.0:
5241; SKX-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5242; SKX-NEXT:    retq # sched: [7:1.00]
5243  %res = xor <64 x i8> %a, %b
5244  ret <64 x i8> %res
5245}
5246
5247define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
5248; GENERIC-LABEL: and_v32i16:
5249; GENERIC:       # %bb.0:
5250; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5251; GENERIC-NEXT:    retq # sched: [1:1.00]
5252;
5253; SKX-LABEL: and_v32i16:
5254; SKX:       # %bb.0:
5255; SKX-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5256; SKX-NEXT:    retq # sched: [7:1.00]
5257  %res = and <32 x i16> %a, %b
5258  ret <32 x i16> %res
5259}
5260
5261define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) {
5262; GENERIC-LABEL: andn_v32i16:
5263; GENERIC:       # %bb.0:
5264; GENERIC-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
5265; GENERIC-NEXT:    retq # sched: [1:1.00]
5266;
5267; SKX-LABEL: andn_v32i16:
5268; SKX:       # %bb.0:
5269; SKX-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
5270; SKX-NEXT:    retq # sched: [7:1.00]
5271  %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
5272                            i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
5273  %res = and <32 x i16> %a, %b2
5274  ret <32 x i16> %res
5275}
5276
5277define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
5278; GENERIC-LABEL: or_v32i16:
5279; GENERIC:       # %bb.0:
5280; GENERIC-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5281; GENERIC-NEXT:    retq # sched: [1:1.00]
5282;
5283; SKX-LABEL: or_v32i16:
5284; SKX:       # %bb.0:
5285; SKX-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5286; SKX-NEXT:    retq # sched: [7:1.00]
5287  %res = or <32 x i16> %a, %b
5288  ret <32 x i16> %res
5289}
5290
5291define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
5292; GENERIC-LABEL: xor_v32i16:
5293; GENERIC:       # %bb.0:
5294; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5295; GENERIC-NEXT:    retq # sched: [1:1.00]
5296;
5297; SKX-LABEL: xor_v32i16:
5298; SKX:       # %bb.0:
5299; SKX-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
5300; SKX-NEXT:    retq # sched: [7:1.00]
5301  %res = xor <32 x i16> %a, %b
5302  ret <32 x i16> %res
5303}
5304
5305define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
5306; GENERIC-LABEL: masked_and_v16f32:
5307; GENERIC:       # %bb.0:
5308; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5309; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
5310; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5311; GENERIC-NEXT:    retq # sched: [1:1.00]
5312;
5313; SKX-LABEL: masked_and_v16f32:
5314; SKX:       # %bb.0:
5315; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5316; SKX-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
5317; SKX-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
5318; SKX-NEXT:    retq # sched: [7:1.00]
5319  %a1 = bitcast <16 x float> %a to <16 x i32>
5320  %b1 = bitcast <16 x float> %b to <16 x i32>
5321  %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
5322  %mask1 = bitcast i16 %mask to <16 x i1>
5323  %op = and <16 x i32> %a1, %b1
5324  %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
5325  %cast = bitcast <16 x i32> %select to <16 x float>
5326  %add = fadd <16 x float> %c, %cast
5327  ret <16 x float> %add
5328}
5329
5330define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
5331; GENERIC-LABEL: masked_or_v16f32:
5332; GENERIC:       # %bb.0:
5333; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5334; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
5335; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5336; GENERIC-NEXT:    retq # sched: [1:1.00]
5337;
5338; SKX-LABEL: masked_or_v16f32:
5339; SKX:       # %bb.0:
5340; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5341; SKX-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
5342; SKX-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
5343; SKX-NEXT:    retq # sched: [7:1.00]
5344  %a1 = bitcast <16 x float> %a to <16 x i32>
5345  %b1 = bitcast <16 x float> %b to <16 x i32>
5346  %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
5347  %mask1 = bitcast i16 %mask to <16 x i1>
5348  %op = and <16 x i32> %a1, %b1
5349  %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
5350  %cast = bitcast <16 x i32> %select to <16 x float>
5351  %add = fadd <16 x float> %c, %cast
5352  ret <16 x float> %add
5353}
5354
5355define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
5356; GENERIC-LABEL: masked_xor_v16f32:
5357; GENERIC:       # %bb.0:
5358; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5359; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
5360; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5361; GENERIC-NEXT:    retq # sched: [1:1.00]
5362;
5363; SKX-LABEL: masked_xor_v16f32:
5364; SKX:       # %bb.0:
5365; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5366; SKX-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
5367; SKX-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
5368; SKX-NEXT:    retq # sched: [7:1.00]
5369  %a1 = bitcast <16 x float> %a to <16 x i32>
5370  %b1 = bitcast <16 x float> %b to <16 x i32>
5371  %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
5372  %mask1 = bitcast i16 %mask to <16 x i1>
5373  %op = and <16 x i32> %a1, %b1
5374  %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
5375  %cast = bitcast <16 x i32> %select to <16 x float>
5376  %add = fadd <16 x float> %c, %cast
5377  ret <16 x float> %add
5378}
5379
5380define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
5381; GENERIC-LABEL: masked_and_v8f64:
5382; GENERIC:       # %bb.0:
5383; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5384; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
5385; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5386; GENERIC-NEXT:    retq # sched: [1:1.00]
5387;
5388; SKX-LABEL: masked_and_v8f64:
5389; SKX:       # %bb.0:
5390; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5391; SKX-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
5392; SKX-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
5393; SKX-NEXT:    retq # sched: [7:1.00]
5394  %a1 = bitcast <8 x double> %a to <8 x i64>
5395  %b1 = bitcast <8 x double> %b to <8 x i64>
5396  %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
5397  %mask1 = bitcast i8 %mask to <8 x i1>
5398  %op = and <8 x i64> %a1, %b1
5399  %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
5400  %cast = bitcast <8 x i64> %select to <8 x double>
5401  %add = fadd <8 x double> %c, %cast
5402  ret <8 x double> %add
5403}
5404
5405define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
5406; GENERIC-LABEL: masked_or_v8f64:
5407; GENERIC:       # %bb.0:
5408; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5409; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
5410; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5411; GENERIC-NEXT:    retq # sched: [1:1.00]
5412;
5413; SKX-LABEL: masked_or_v8f64:
5414; SKX:       # %bb.0:
5415; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5416; SKX-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
5417; SKX-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
5418; SKX-NEXT:    retq # sched: [7:1.00]
5419  %a1 = bitcast <8 x double> %a to <8 x i64>
5420  %b1 = bitcast <8 x double> %b to <8 x i64>
5421  %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
5422  %mask1 = bitcast i8 %mask to <8 x i1>
5423  %op = and <8 x i64> %a1, %b1
5424  %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
5425  %cast = bitcast <8 x i64> %select to <8 x double>
5426  %add = fadd <8 x double> %c, %cast
5427  ret <8 x double> %add
5428}
5429
5430define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
5431; GENERIC-LABEL: masked_xor_v8f64:
5432; GENERIC:       # %bb.0:
5433; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5434; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
5435; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5436; GENERIC-NEXT:    retq # sched: [1:1.00]
5437;
5438; SKX-LABEL: masked_xor_v8f64:
5439; SKX:       # %bb.0:
5440; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5441; SKX-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
5442; SKX-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
5443; SKX-NEXT:    retq # sched: [7:1.00]
5444  %a1 = bitcast <8 x double> %a to <8 x i64>
5445  %b1 = bitcast <8 x double> %b to <8 x i64>
5446  %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
5447  %mask1 = bitcast i8 %mask to <8 x i1>
5448  %op = and <8 x i64> %a1, %b1
5449  %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
5450  %cast = bitcast <8 x i64> %select to <8 x double>
5451  %add = fadd <8 x double> %c, %cast
5452  ret <8 x double> %add
5453}
5454
5455define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
5456; GENERIC-LABEL: test_mm512_mask_and_epi32:
5457; GENERIC:       # %bb.0: # %entry
5458; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5459; GENERIC-NEXT:    vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
5460; GENERIC-NEXT:    retq # sched: [1:1.00]
5461;
5462; SKX-LABEL: test_mm512_mask_and_epi32:
5463; SKX:       # %bb.0: # %entry
5464; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5465; SKX-NEXT:    vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
5466; SKX-NEXT:    retq # sched: [7:1.00]
5467entry:
5468  %and1.i.i = and <8 x i64> %__a, %__b
5469  %0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
5470  %1 = bitcast <8 x i64> %__src to <16 x i32>
5471  %2 = bitcast i16 %__k to <16 x i1>
5472  %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
5473  %4 = bitcast <16 x i32> %3 to <8 x i64>
5474  ret <8 x i64> %4
5475}
5476
5477define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
5478; GENERIC-LABEL: test_mm512_mask_or_epi32:
5479; GENERIC:       # %bb.0: # %entry
5480; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5481; GENERIC-NEXT:    vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
5482; GENERIC-NEXT:    retq # sched: [1:1.00]
5483;
5484; SKX-LABEL: test_mm512_mask_or_epi32:
5485; SKX:       # %bb.0: # %entry
5486; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5487; SKX-NEXT:    vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
5488; SKX-NEXT:    retq # sched: [7:1.00]
5489entry:
5490  %or1.i.i = or <8 x i64> %__a, %__b
5491  %0 = bitcast <8 x i64> %or1.i.i to <16 x i32>
5492  %1 = bitcast <8 x i64> %__src to <16 x i32>
5493  %2 = bitcast i16 %__k to <16 x i1>
5494  %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
5495  %4 = bitcast <16 x i32> %3 to <8 x i64>
5496  ret <8 x i64> %4
5497}
5498
5499define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
5500; GENERIC-LABEL: test_mm512_mask_xor_epi32:
5501; GENERIC:       # %bb.0: # %entry
5502; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5503; GENERIC-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
5504; GENERIC-NEXT:    retq # sched: [1:1.00]
5505;
5506; SKX-LABEL: test_mm512_mask_xor_epi32:
5507; SKX:       # %bb.0: # %entry
5508; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5509; SKX-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
5510; SKX-NEXT:    retq # sched: [7:1.00]
5511entry:
5512  %xor1.i.i = xor <8 x i64> %__a, %__b
5513  %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32>
5514  %1 = bitcast <8 x i64> %__src to <16 x i32>
5515  %2 = bitcast i16 %__k to <16 x i1>
5516  %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
5517  %4 = bitcast <16 x i32> %3 to <8 x i64>
5518  ret <8 x i64> %4
5519}
5520
5521define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5522; GENERIC-LABEL: test_mm512_mask_xor_pd:
5523; GENERIC:       # %bb.0: # %entry
5524; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5525; GENERIC-NEXT:    vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
5526; GENERIC-NEXT:    retq # sched: [1:1.00]
5527;
5528; SKX-LABEL: test_mm512_mask_xor_pd:
5529; SKX:       # %bb.0: # %entry
5530; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5531; SKX-NEXT:    vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
5532; SKX-NEXT:    retq # sched: [7:1.00]
5533entry:
5534  %0 = bitcast <8 x double> %__A to <8 x i64>
5535  %1 = bitcast <8 x double> %__B to <8 x i64>
5536  %xor.i.i = xor <8 x i64> %0, %1
5537  %2 = bitcast <8 x i64> %xor.i.i to <8 x double>
5538  %3 = bitcast i8 %__U to <8 x i1>
5539  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
5540  ret <8 x double> %4
5541}
5542
5543define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5544; GENERIC-LABEL: test_mm512_maskz_xor_pd:
5545; GENERIC:       # %bb.0: # %entry
5546; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5547; GENERIC-NEXT:    vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
5548; GENERIC-NEXT:    retq # sched: [1:1.00]
5549;
5550; SKX-LABEL: test_mm512_maskz_xor_pd:
5551; SKX:       # %bb.0: # %entry
5552; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5553; SKX-NEXT:    vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
5554; SKX-NEXT:    retq # sched: [7:1.00]
5555entry:
5556  %0 = bitcast <8 x double> %__A to <8 x i64>
5557  %1 = bitcast <8 x double> %__B to <8 x i64>
5558  %xor.i.i = xor <8 x i64> %0, %1
5559  %2 = bitcast <8 x i64> %xor.i.i to <8 x double>
5560  %3 = bitcast i8 %__U to <8 x i1>
5561  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
5562  ret <8 x double> %4
5563}
5564
5565define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5566; GENERIC-LABEL: test_mm512_mask_xor_ps:
5567; GENERIC:       # %bb.0: # %entry
5568; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5569; GENERIC-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
5570; GENERIC-NEXT:    retq # sched: [1:1.00]
5571;
5572; SKX-LABEL: test_mm512_mask_xor_ps:
5573; SKX:       # %bb.0: # %entry
5574; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5575; SKX-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
5576; SKX-NEXT:    retq # sched: [7:1.00]
5577entry:
5578  %0 = bitcast <16 x float> %__A to <16 x i32>
5579  %1 = bitcast <16 x float> %__B to <16 x i32>
5580  %xor.i.i = xor <16 x i32> %0, %1
5581  %2 = bitcast <16 x i32> %xor.i.i to <16 x float>
5582  %3 = bitcast i16 %__U to <16 x i1>
5583  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
5584  ret <16 x float> %4
5585}
5586
5587define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5588; GENERIC-LABEL: test_mm512_maskz_xor_ps:
5589; GENERIC:       # %bb.0: # %entry
5590; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5591; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
5592; GENERIC-NEXT:    retq # sched: [1:1.00]
5593;
5594; SKX-LABEL: test_mm512_maskz_xor_ps:
5595; SKX:       # %bb.0: # %entry
5596; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5597; SKX-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
5598; SKX-NEXT:    retq # sched: [7:1.00]
5599entry:
5600  %0 = bitcast <16 x float> %__A to <16 x i32>
5601  %1 = bitcast <16 x float> %__B to <16 x i32>
5602  %xor.i.i = xor <16 x i32> %0, %1
5603  %2 = bitcast <16 x i32> %xor.i.i to <16 x float>
5604  %3 = bitcast i16 %__U to <16 x i1>
5605  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
5606  ret <16 x float> %4
5607}
5608
5609define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5610; GENERIC-LABEL: test_mm512_mask_or_pd:
5611; GENERIC:       # %bb.0: # %entry
5612; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5613; GENERIC-NEXT:    vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
5614; GENERIC-NEXT:    retq # sched: [1:1.00]
5615;
5616; SKX-LABEL: test_mm512_mask_or_pd:
5617; SKX:       # %bb.0: # %entry
5618; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5619; SKX-NEXT:    vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
5620; SKX-NEXT:    retq # sched: [7:1.00]
5621entry:
5622  %0 = bitcast <8 x double> %__A to <8 x i64>
5623  %1 = bitcast <8 x double> %__B to <8 x i64>
5624  %or.i.i = or <8 x i64> %1, %0
5625  %2 = bitcast <8 x i64> %or.i.i to <8 x double>
5626  %3 = bitcast i8 %__U to <8 x i1>
5627  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
5628  ret <8 x double> %4
5629}
5630
5631define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5632; GENERIC-LABEL: test_mm512_maskz_or_pd:
5633; GENERIC:       # %bb.0: # %entry
5634; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5635; GENERIC-NEXT:    vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
5636; GENERIC-NEXT:    retq # sched: [1:1.00]
5637;
5638; SKX-LABEL: test_mm512_maskz_or_pd:
5639; SKX:       # %bb.0: # %entry
5640; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5641; SKX-NEXT:    vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
5642; SKX-NEXT:    retq # sched: [7:1.00]
5643entry:
5644  %0 = bitcast <8 x double> %__A to <8 x i64>
5645  %1 = bitcast <8 x double> %__B to <8 x i64>
5646  %or.i.i = or <8 x i64> %1, %0
5647  %2 = bitcast <8 x i64> %or.i.i to <8 x double>
5648  %3 = bitcast i8 %__U to <8 x i1>
5649  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
5650  ret <8 x double> %4
5651}
5652
5653define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5654; GENERIC-LABEL: test_mm512_mask_or_ps:
5655; GENERIC:       # %bb.0: # %entry
5656; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5657; GENERIC-NEXT:    vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
5658; GENERIC-NEXT:    retq # sched: [1:1.00]
5659;
5660; SKX-LABEL: test_mm512_mask_or_ps:
5661; SKX:       # %bb.0: # %entry
5662; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5663; SKX-NEXT:    vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
5664; SKX-NEXT:    retq # sched: [7:1.00]
5665entry:
5666  %0 = bitcast <16 x float> %__A to <16 x i32>
5667  %1 = bitcast <16 x float> %__B to <16 x i32>
5668  %or.i.i = or <16 x i32> %1, %0
5669  %2 = bitcast <16 x i32> %or.i.i to <16 x float>
5670  %3 = bitcast i16 %__U to <16 x i1>
5671  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
5672  ret <16 x float> %4
5673}
5674
5675define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5676; GENERIC-LABEL: test_mm512_maskz_or_ps:
5677; GENERIC:       # %bb.0: # %entry
5678; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5679; GENERIC-NEXT:    vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
5680; GENERIC-NEXT:    retq # sched: [1:1.00]
5681;
5682; SKX-LABEL: test_mm512_maskz_or_ps:
5683; SKX:       # %bb.0: # %entry
5684; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5685; SKX-NEXT:    vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
5686; SKX-NEXT:    retq # sched: [7:1.00]
5687entry:
5688  %0 = bitcast <16 x float> %__A to <16 x i32>
5689  %1 = bitcast <16 x float> %__B to <16 x i32>
5690  %or.i.i = or <16 x i32> %1, %0
5691  %2 = bitcast <16 x i32> %or.i.i to <16 x float>
5692  %3 = bitcast i16 %__U to <16 x i1>
5693  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
5694  ret <16 x float> %4
5695}
5696
5697define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5698; GENERIC-LABEL: test_mm512_mask_and_pd:
5699; GENERIC:       # %bb.0: # %entry
5700; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5701; GENERIC-NEXT:    vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
5702; GENERIC-NEXT:    retq # sched: [1:1.00]
5703;
5704; SKX-LABEL: test_mm512_mask_and_pd:
5705; SKX:       # %bb.0: # %entry
5706; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5707; SKX-NEXT:    vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
5708; SKX-NEXT:    retq # sched: [7:1.00]
5709entry:
5710  %0 = bitcast <8 x double> %__A to <8 x i64>
5711  %1 = bitcast <8 x double> %__B to <8 x i64>
5712  %and.i.i = and <8 x i64> %1, %0
5713  %2 = bitcast <8 x i64> %and.i.i to <8 x double>
5714  %3 = bitcast i8 %__U to <8 x i1>
5715  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
5716  ret <8 x double> %4
5717}
5718
5719define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5720; GENERIC-LABEL: test_mm512_maskz_and_pd:
5721; GENERIC:       # %bb.0: # %entry
5722; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5723; GENERIC-NEXT:    vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
5724; GENERIC-NEXT:    retq # sched: [1:1.00]
5725;
5726; SKX-LABEL: test_mm512_maskz_and_pd:
5727; SKX:       # %bb.0: # %entry
5728; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5729; SKX-NEXT:    vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
5730; SKX-NEXT:    retq # sched: [7:1.00]
5731entry:
5732  %0 = bitcast <8 x double> %__A to <8 x i64>
5733  %1 = bitcast <8 x double> %__B to <8 x i64>
5734  %and.i.i = and <8 x i64> %1, %0
5735  %2 = bitcast <8 x i64> %and.i.i to <8 x double>
5736  %3 = bitcast i8 %__U to <8 x i1>
5737  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
5738  ret <8 x double> %4
5739}
5740
5741define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5742; GENERIC-LABEL: test_mm512_mask_and_ps:
5743; GENERIC:       # %bb.0: # %entry
5744; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5745; GENERIC-NEXT:    vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
5746; GENERIC-NEXT:    retq # sched: [1:1.00]
5747;
5748; SKX-LABEL: test_mm512_mask_and_ps:
5749; SKX:       # %bb.0: # %entry
5750; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5751; SKX-NEXT:    vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
5752; SKX-NEXT:    retq # sched: [7:1.00]
5753entry:
5754  %0 = bitcast <16 x float> %__A to <16 x i32>
5755  %1 = bitcast <16 x float> %__B to <16 x i32>
5756  %and.i.i = and <16 x i32> %1, %0
5757  %2 = bitcast <16 x i32> %and.i.i to <16 x float>
5758  %3 = bitcast i16 %__U to <16 x i1>
5759  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
5760  ret <16 x float> %4
5761}
5762
5763define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5764; GENERIC-LABEL: test_mm512_maskz_and_ps:
5765; GENERIC:       # %bb.0: # %entry
5766; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5767; GENERIC-NEXT:    vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
5768; GENERIC-NEXT:    retq # sched: [1:1.00]
5769;
5770; SKX-LABEL: test_mm512_maskz_and_ps:
5771; SKX:       # %bb.0: # %entry
5772; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5773; SKX-NEXT:    vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
5774; SKX-NEXT:    retq # sched: [7:1.00]
5775entry:
5776  %0 = bitcast <16 x float> %__A to <16 x i32>
5777  %1 = bitcast <16 x float> %__B to <16 x i32>
5778  %and.i.i = and <16 x i32> %1, %0
5779  %2 = bitcast <16 x i32> %and.i.i to <16 x float>
5780  %3 = bitcast i16 %__U to <16 x i1>
5781  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
5782  ret <16 x float> %4
5783}
5784
5785define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5786; GENERIC-LABEL: test_mm512_mask_andnot_pd:
5787; GENERIC:       # %bb.0: # %entry
5788; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5789; GENERIC-NEXT:    vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
5790; GENERIC-NEXT:    retq # sched: [1:1.00]
5791;
5792; SKX-LABEL: test_mm512_mask_andnot_pd:
5793; SKX:       # %bb.0: # %entry
5794; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5795; SKX-NEXT:    vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
5796; SKX-NEXT:    retq # sched: [7:1.00]
5797entry:
5798  %0 = bitcast <8 x double> %__A to <8 x i64>
5799  %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
5800  %1 = bitcast <8 x double> %__B to <8 x i64>
5801  %and.i.i = and <8 x i64> %1, %neg.i.i
5802  %2 = bitcast <8 x i64> %and.i.i to <8 x double>
5803  %3 = bitcast i8 %__U to <8 x i1>
5804  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
5805  ret <8 x double> %4
5806}
5807
5808define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5809; GENERIC-LABEL: test_mm512_maskz_andnot_pd:
5810; GENERIC:       # %bb.0: # %entry
5811; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5812; GENERIC-NEXT:    vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
5813; GENERIC-NEXT:    retq # sched: [1:1.00]
5814;
5815; SKX-LABEL: test_mm512_maskz_andnot_pd:
5816; SKX:       # %bb.0: # %entry
5817; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5818; SKX-NEXT:    vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
5819; SKX-NEXT:    retq # sched: [7:1.00]
5820entry:
5821  %0 = bitcast <8 x double> %__A to <8 x i64>
5822  %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
5823  %1 = bitcast <8 x double> %__B to <8 x i64>
5824  %and.i.i = and <8 x i64> %1, %neg.i.i
5825  %2 = bitcast <8 x i64> %and.i.i to <8 x double>
5826  %3 = bitcast i8 %__U to <8 x i1>
5827  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
5828  ret <8 x double> %4
5829}
5830
5831define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5832; GENERIC-LABEL: test_mm512_mask_andnot_ps:
5833; GENERIC:       # %bb.0: # %entry
5834; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5835; GENERIC-NEXT:    vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
5836; GENERIC-NEXT:    retq # sched: [1:1.00]
5837;
5838; SKX-LABEL: test_mm512_mask_andnot_ps:
5839; SKX:       # %bb.0: # %entry
5840; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5841; SKX-NEXT:    vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
5842; SKX-NEXT:    retq # sched: [7:1.00]
5843entry:
5844  %0 = bitcast <16 x float> %__A to <16 x i32>
5845  %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
5846  %1 = bitcast <16 x float> %__B to <16 x i32>
5847  %and.i.i = and <16 x i32> %1, %neg.i.i
5848  %2 = bitcast <16 x i32> %and.i.i to <16 x float>
5849  %3 = bitcast i16 %__U to <16 x i1>
5850  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
5851  ret <16 x float> %4
5852}
5853
5854define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5855; GENERIC-LABEL: test_mm512_maskz_andnot_ps:
5856; GENERIC:       # %bb.0: # %entry
5857; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
5858; GENERIC-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
5859; GENERIC-NEXT:    retq # sched: [1:1.00]
5860;
5861; SKX-LABEL: test_mm512_maskz_andnot_ps:
5862; SKX:       # %bb.0: # %entry
5863; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
5864; SKX-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
5865; SKX-NEXT:    retq # sched: [7:1.00]
5866entry:
5867  %0 = bitcast <16 x float> %__A to <16 x i32>
5868  %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
5869  %1 = bitcast <16 x float> %__B to <16 x i32>
5870  %and.i.i = and <16 x i32> %1, %neg.i.i
5871  %2 = bitcast <16 x i32> %and.i.i to <16 x float>
5872  %3 = bitcast i16 %__U to <16 x i1>
5873  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
5874  ret <16 x float> %4
5875}
5876
5877define i32 @mov_test1(float %x) {
5878; GENERIC-LABEL: mov_test1:
5879; GENERIC:       # %bb.0:
5880; GENERIC-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
5881; GENERIC-NEXT:    retq # sched: [1:1.00]
5882;
5883; SKX-LABEL: mov_test1:
5884; SKX:       # %bb.0:
5885; SKX-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
5886; SKX-NEXT:    retq # sched: [7:1.00]
5887   %res = bitcast float %x to i32
5888   ret i32 %res
5889}
5890
5891define <4 x i32> @mov_test2(i32 %x) {
5892; GENERIC-LABEL: mov_test2:
5893; GENERIC:       # %bb.0:
5894; GENERIC-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
5895; GENERIC-NEXT:    retq # sched: [1:1.00]
5896;
5897; SKX-LABEL: mov_test2:
5898; SKX:       # %bb.0:
5899; SKX-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
5900; SKX-NEXT:    retq # sched: [7:1.00]
5901   %res = insertelement <4 x i32>undef, i32 %x, i32 0
5902   ret <4 x i32>%res
5903}
5904
5905define <2 x i64> @mov_test3(i64 %x) {
5906; GENERIC-LABEL: mov_test3:
5907; GENERIC:       # %bb.0:
5908; GENERIC-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
5909; GENERIC-NEXT:    retq # sched: [1:1.00]
5910;
5911; SKX-LABEL: mov_test3:
5912; SKX:       # %bb.0:
5913; SKX-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
5914; SKX-NEXT:    retq # sched: [7:1.00]
5915   %res = insertelement <2 x i64>undef, i64 %x, i32 0
5916   ret <2 x i64>%res
5917}
5918
5919define <4 x i32> @mov_test4(i32* %x) {
5920; GENERIC-LABEL: mov_test4:
5921; GENERIC:       # %bb.0:
5922; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
5923; GENERIC-NEXT:    retq # sched: [1:1.00]
5924;
5925; SKX-LABEL: mov_test4:
5926; SKX:       # %bb.0:
5927; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
5928; SKX-NEXT:    retq # sched: [7:1.00]
5929   %y = load i32, i32* %x
5930   %res = insertelement <4 x i32>undef, i32 %y, i32 0
5931   ret <4 x i32>%res
5932}
5933
5934define void @mov_test5(float %x, float* %y) {
5935; GENERIC-LABEL: mov_test5:
5936; GENERIC:       # %bb.0:
5937; GENERIC-NEXT:    vmovss %xmm0, (%rdi) # sched: [1:1.00]
5938; GENERIC-NEXT:    retq # sched: [1:1.00]
5939;
5940; SKX-LABEL: mov_test5:
5941; SKX:       # %bb.0:
5942; SKX-NEXT:    vmovss %xmm0, (%rdi) # sched: [1:1.00]
5943; SKX-NEXT:    retq # sched: [7:1.00]
5944   store float %x, float* %y, align 4
5945   ret void
5946}
5947
5948define void @mov_test6(double %x, double* %y) {
5949; GENERIC-LABEL: mov_test6:
5950; GENERIC:       # %bb.0:
5951; GENERIC-NEXT:    vmovsd %xmm0, (%rdi) # sched: [1:1.00]
5952; GENERIC-NEXT:    retq # sched: [1:1.00]
5953;
5954; SKX-LABEL: mov_test6:
5955; SKX:       # %bb.0:
5956; SKX-NEXT:    vmovsd %xmm0, (%rdi) # sched: [1:1.00]
5957; SKX-NEXT:    retq # sched: [7:1.00]
5958   store double %x, double* %y, align 8
5959   ret void
5960}
5961
5962define float @mov_test7(i32* %x) {
5963; GENERIC-LABEL: mov_test7:
5964; GENERIC:       # %bb.0:
5965; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
5966; GENERIC-NEXT:    retq # sched: [1:1.00]
5967;
5968; SKX-LABEL: mov_test7:
5969; SKX:       # %bb.0:
5970; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
5971; SKX-NEXT:    retq # sched: [7:1.00]
5972   %y = load i32, i32* %x
5973   %res = bitcast i32 %y to float
5974   ret float %res
5975}
5976
5977define i32 @mov_test8(<4 x i32> %x) {
5978; GENERIC-LABEL: mov_test8:
5979; GENERIC:       # %bb.0:
5980; GENERIC-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
5981; GENERIC-NEXT:    retq # sched: [1:1.00]
5982;
5983; SKX-LABEL: mov_test8:
5984; SKX:       # %bb.0:
5985; SKX-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
5986; SKX-NEXT:    retq # sched: [7:1.00]
5987   %res = extractelement <4 x i32> %x, i32 0
5988   ret i32 %res
5989}
5990
5991define i64 @mov_test9(<2 x i64> %x) {
5992; GENERIC-LABEL: mov_test9:
5993; GENERIC:       # %bb.0:
5994; GENERIC-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
5995; GENERIC-NEXT:    retq # sched: [1:1.00]
5996;
5997; SKX-LABEL: mov_test9:
5998; SKX:       # %bb.0:
5999; SKX-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
6000; SKX-NEXT:    retq # sched: [7:1.00]
6001   %res = extractelement <2 x i64> %x, i32 0
6002   ret i64 %res
6003}
6004
6005define <4 x i32> @mov_test10(i32* %x) {
6006; GENERIC-LABEL: mov_test10:
6007; GENERIC:       # %bb.0:
6008; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
6009; GENERIC-NEXT:    retq # sched: [1:1.00]
6010;
6011; SKX-LABEL: mov_test10:
6012; SKX:       # %bb.0:
6013; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
6014; SKX-NEXT:    retq # sched: [7:1.00]
6015   %y = load i32, i32* %x, align 4
6016   %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
6017   ret <4 x i32>%res
6018}
6019
6020define <4 x float> @mov_test11(float* %x) {
6021; GENERIC-LABEL: mov_test11:
6022; GENERIC:       # %bb.0:
6023; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
6024; GENERIC-NEXT:    retq # sched: [1:1.00]
6025;
6026; SKX-LABEL: mov_test11:
6027; SKX:       # %bb.0:
6028; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
6029; SKX-NEXT:    retq # sched: [7:1.00]
6030   %y = load float, float* %x, align 4
6031   %res = insertelement <4 x float>zeroinitializer, float %y, i32 0
6032   ret <4 x float>%res
6033}
6034
6035define <2 x double> @mov_test12(double* %x) {
6036; GENERIC-LABEL: mov_test12:
6037; GENERIC:       # %bb.0:
6038; GENERIC-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
6039; GENERIC-NEXT:    retq # sched: [1:1.00]
6040;
6041; SKX-LABEL: mov_test12:
6042; SKX:       # %bb.0:
6043; SKX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
6044; SKX-NEXT:    retq # sched: [7:1.00]
6045   %y = load double, double* %x, align 8
6046   %res = insertelement <2 x double>zeroinitializer, double %y, i32 0
6047   ret <2 x double>%res
6048}
6049
6050define <2 x i64> @mov_test13(i64 %x) {
6051; GENERIC-LABEL: mov_test13:
6052; GENERIC:       # %bb.0:
6053; GENERIC-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
6054; GENERIC-NEXT:    retq # sched: [1:1.00]
6055;
6056; SKX-LABEL: mov_test13:
6057; SKX:       # %bb.0:
6058; SKX-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
6059; SKX-NEXT:    retq # sched: [7:1.00]
6060   %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0
6061   ret <2 x i64>%res
6062}
6063
6064define <4 x i32> @mov_test14(i32 %x) {
6065; GENERIC-LABEL: mov_test14:
6066; GENERIC:       # %bb.0:
6067; GENERIC-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
6068; GENERIC-NEXT:    retq # sched: [1:1.00]
6069;
6070; SKX-LABEL: mov_test14:
6071; SKX:       # %bb.0:
6072; SKX-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
6073; SKX-NEXT:    retq # sched: [7:1.00]
6074   %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
6075   ret <4 x i32>%res
6076}
6077
6078define <4 x i32> @mov_test15(i32* %x) {
6079; GENERIC-LABEL: mov_test15:
6080; GENERIC:       # %bb.0:
6081; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
6082; GENERIC-NEXT:    retq # sched: [1:1.00]
6083;
6084; SKX-LABEL: mov_test15:
6085; SKX:       # %bb.0:
6086; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
6087; SKX-NEXT:    retq # sched: [7:1.00]
6088   %y = load i32, i32* %x, align 4
6089   %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
6090   ret <4 x i32>%res
6091}
6092
6093define <16 x i32> @mov_test16(i8 * %addr) {
6094; GENERIC-LABEL: mov_test16:
6095; GENERIC:       # %bb.0:
6096; GENERIC-NEXT:    vmovups (%rdi), %zmm0 # sched: [7:0.50]
6097; GENERIC-NEXT:    retq # sched: [1:1.00]
6098;
6099; SKX-LABEL: mov_test16:
6100; SKX:       # %bb.0:
6101; SKX-NEXT:    vmovups (%rdi), %zmm0 # sched: [8:0.50]
6102; SKX-NEXT:    retq # sched: [7:1.00]
6103  %vaddr = bitcast i8* %addr to <16 x i32>*
6104  %res = load <16 x i32>, <16 x i32>* %vaddr, align 1
6105  ret <16 x i32>%res
6106}
6107
6108define <16 x i32> @mov_test17(i8 * %addr) {
6109; GENERIC-LABEL: mov_test17:
6110; GENERIC:       # %bb.0:
6111; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 # sched: [7:0.50]
6112; GENERIC-NEXT:    retq # sched: [1:1.00]
6113;
6114; SKX-LABEL: mov_test17:
6115; SKX:       # %bb.0:
6116; SKX-NEXT:    vmovaps (%rdi), %zmm0 # sched: [8:0.50]
6117; SKX-NEXT:    retq # sched: [7:1.00]
6118  %vaddr = bitcast i8* %addr to <16 x i32>*
6119  %res = load <16 x i32>, <16 x i32>* %vaddr, align 64
6120  ret <16 x i32>%res
6121}
6122
6123define void @mov_test18(i8 * %addr, <8 x i64> %data) {
6124; GENERIC-LABEL: mov_test18:
6125; GENERIC:       # %bb.0:
6126; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6127; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6128; GENERIC-NEXT:    retq # sched: [1:1.00]
6129;
6130; SKX-LABEL: mov_test18:
6131; SKX:       # %bb.0:
6132; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6133; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6134; SKX-NEXT:    retq # sched: [7:1.00]
6135  %vaddr = bitcast i8* %addr to <8 x i64>*
6136  store <8 x i64>%data, <8 x i64>* %vaddr, align 64
6137  ret void
6138}
6139
6140define void @mov_test19(i8 * %addr, <16 x i32> %data) {
6141; GENERIC-LABEL: mov_test19:
6142; GENERIC:       # %bb.0:
6143; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
6144; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6145; GENERIC-NEXT:    retq # sched: [1:1.00]
6146;
6147; SKX-LABEL: mov_test19:
6148; SKX:       # %bb.0:
6149; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
6150; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6151; SKX-NEXT:    retq # sched: [7:1.00]
6152  %vaddr = bitcast i8* %addr to <16 x i32>*
6153  store <16 x i32>%data, <16 x i32>* %vaddr, align 1
6154  ret void
6155}
6156
6157define void @mov_test20(i8 * %addr, <16 x i32> %data) {
6158; GENERIC-LABEL: mov_test20:
6159; GENERIC:       # %bb.0:
6160; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6161; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6162; GENERIC-NEXT:    retq # sched: [1:1.00]
6163;
6164; SKX-LABEL: mov_test20:
6165; SKX:       # %bb.0:
6166; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6167; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6168; SKX-NEXT:    retq # sched: [7:1.00]
6169  %vaddr = bitcast i8* %addr to <16 x i32>*
6170  store <16 x i32>%data, <16 x i32>* %vaddr, align 64
6171  ret void
6172}
6173
6174define  <8 x i64> @mov_test21(i8 * %addr) {
6175; GENERIC-LABEL: mov_test21:
6176; GENERIC:       # %bb.0:
6177; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 # sched: [7:0.50]
6178; GENERIC-NEXT:    retq # sched: [1:1.00]
6179;
6180; SKX-LABEL: mov_test21:
6181; SKX:       # %bb.0:
6182; SKX-NEXT:    vmovaps (%rdi), %zmm0 # sched: [8:0.50]
6183; SKX-NEXT:    retq # sched: [7:1.00]
6184  %vaddr = bitcast i8* %addr to <8 x i64>*
6185  %res = load <8 x i64>, <8 x i64>* %vaddr, align 64
6186  ret <8 x i64>%res
6187}
6188
6189define void @mov_test22(i8 * %addr, <8 x i64> %data) {
6190; GENERIC-LABEL: mov_test22:
6191; GENERIC:       # %bb.0:
6192; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
6193; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6194; GENERIC-NEXT:    retq # sched: [1:1.00]
6195;
6196; SKX-LABEL: mov_test22:
6197; SKX:       # %bb.0:
6198; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
6199; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6200; SKX-NEXT:    retq # sched: [7:1.00]
6201  %vaddr = bitcast i8* %addr to <8 x i64>*
6202  store <8 x i64>%data, <8 x i64>* %vaddr, align 1
6203  ret void
6204}
6205
6206define <8 x i64> @mov_test23(i8 * %addr) {
6207; GENERIC-LABEL: mov_test23:
6208; GENERIC:       # %bb.0:
6209; GENERIC-NEXT:    vmovups (%rdi), %zmm0 # sched: [7:0.50]
6210; GENERIC-NEXT:    retq # sched: [1:1.00]
6211;
6212; SKX-LABEL: mov_test23:
6213; SKX:       # %bb.0:
6214; SKX-NEXT:    vmovups (%rdi), %zmm0 # sched: [8:0.50]
6215; SKX-NEXT:    retq # sched: [7:1.00]
6216  %vaddr = bitcast i8* %addr to <8 x i64>*
6217  %res = load <8 x i64>, <8 x i64>* %vaddr, align 1
6218  ret <8 x i64>%res
6219}
6220
6221define void @mov_test24(i8 * %addr, <8 x double> %data) {
6222; GENERIC-LABEL: mov_test24:
6223; GENERIC:       # %bb.0:
6224; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6225; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6226; GENERIC-NEXT:    retq # sched: [1:1.00]
6227;
6228; SKX-LABEL: mov_test24:
6229; SKX:       # %bb.0:
6230; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6231; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6232; SKX-NEXT:    retq # sched: [7:1.00]
6233  %vaddr = bitcast i8* %addr to <8 x double>*
6234  store <8 x double>%data, <8 x double>* %vaddr, align 64
6235  ret void
6236}
6237
6238define <8 x double> @mov_test25(i8 * %addr) {
6239; GENERIC-LABEL: mov_test25:
6240; GENERIC:       # %bb.0:
6241; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 # sched: [7:0.50]
6242; GENERIC-NEXT:    retq # sched: [1:1.00]
6243;
6244; SKX-LABEL: mov_test25:
6245; SKX:       # %bb.0:
6246; SKX-NEXT:    vmovaps (%rdi), %zmm0 # sched: [8:0.50]
6247; SKX-NEXT:    retq # sched: [7:1.00]
6248  %vaddr = bitcast i8* %addr to <8 x double>*
6249  %res = load <8 x double>, <8 x double>* %vaddr, align 64
6250  ret <8 x double>%res
6251}
6252
6253define void @mov_test26(i8 * %addr, <16 x float> %data) {
6254; GENERIC-LABEL: mov_test26:
6255; GENERIC:       # %bb.0:
6256; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6257; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6258; GENERIC-NEXT:    retq # sched: [1:1.00]
6259;
6260; SKX-LABEL: mov_test26:
6261; SKX:       # %bb.0:
6262; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6263; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6264; SKX-NEXT:    retq # sched: [7:1.00]
6265  %vaddr = bitcast i8* %addr to <16 x float>*
6266  store <16 x float>%data, <16 x float>* %vaddr, align 64
6267  ret void
6268}
6269
6270define <16 x float> @mov_test27(i8 * %addr) {
6271; GENERIC-LABEL: mov_test27:
6272; GENERIC:       # %bb.0:
6273; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 # sched: [7:0.50]
6274; GENERIC-NEXT:    retq # sched: [1:1.00]
6275;
6276; SKX-LABEL: mov_test27:
6277; SKX:       # %bb.0:
6278; SKX-NEXT:    vmovaps (%rdi), %zmm0 # sched: [8:0.50]
6279; SKX-NEXT:    retq # sched: [7:1.00]
6280  %vaddr = bitcast i8* %addr to <16 x float>*
6281  %res = load <16 x float>, <16 x float>* %vaddr, align 64
6282  ret <16 x float>%res
6283}
6284
6285define void @mov_test28(i8 * %addr, <8 x double> %data) {
6286; GENERIC-LABEL: mov_test28:
6287; GENERIC:       # %bb.0:
6288; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
6289; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6290; GENERIC-NEXT:    retq # sched: [1:1.00]
6291;
6292; SKX-LABEL: mov_test28:
6293; SKX:       # %bb.0:
6294; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
6295; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6296; SKX-NEXT:    retq # sched: [7:1.00]
6297  %vaddr = bitcast i8* %addr to <8 x double>*
6298  store <8 x double>%data, <8 x double>* %vaddr, align 1
6299  ret void
6300}
6301
6302define <8 x double> @mov_test29(i8 * %addr) {
6303; GENERIC-LABEL: mov_test29:
6304; GENERIC:       # %bb.0:
6305; GENERIC-NEXT:    vmovups (%rdi), %zmm0 # sched: [7:0.50]
6306; GENERIC-NEXT:    retq # sched: [1:1.00]
6307;
6308; SKX-LABEL: mov_test29:
6309; SKX:       # %bb.0:
6310; SKX-NEXT:    vmovups (%rdi), %zmm0 # sched: [8:0.50]
6311; SKX-NEXT:    retq # sched: [7:1.00]
6312  %vaddr = bitcast i8* %addr to <8 x double>*
6313  %res = load <8 x double>, <8 x double>* %vaddr, align 1
6314  ret <8 x double>%res
6315}
6316
6317define void @mov_test30(i8 * %addr, <16 x float> %data) {
6318; GENERIC-LABEL: mov_test30:
6319; GENERIC:       # %bb.0:
6320; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
6321; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6322; GENERIC-NEXT:    retq # sched: [1:1.00]
6323;
6324; SKX-LABEL: mov_test30:
6325; SKX:       # %bb.0:
6326; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
6327; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6328; SKX-NEXT:    retq # sched: [7:1.00]
6329  %vaddr = bitcast i8* %addr to <16 x float>*
6330  store <16 x float>%data, <16 x float>* %vaddr, align 1
6331  ret void
6332}
6333
6334define <16 x float> @mov_test31(i8 * %addr) {
6335; GENERIC-LABEL: mov_test31:
6336; GENERIC:       # %bb.0:
6337; GENERIC-NEXT:    vmovups (%rdi), %zmm0 # sched: [7:0.50]
6338; GENERIC-NEXT:    retq # sched: [1:1.00]
6339;
6340; SKX-LABEL: mov_test31:
6341; SKX:       # %bb.0:
6342; SKX-NEXT:    vmovups (%rdi), %zmm0 # sched: [8:0.50]
6343; SKX-NEXT:    retq # sched: [7:1.00]
6344  %vaddr = bitcast i8* %addr to <16 x float>*
6345  %res = load <16 x float>, <16 x float>* %vaddr, align 1
6346  ret <16 x float>%res
6347}
6348
6349define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
6350; GENERIC-LABEL: mov_test32:
6351; GENERIC:       # %bb.0:
6352; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
6353; GENERIC-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
6354; GENERIC-NEXT:    retq # sched: [1:1.00]
6355;
6356; SKX-LABEL: mov_test32:
6357; SKX:       # %bb.0:
6358; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
6359; SKX-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6360; SKX-NEXT:    retq # sched: [7:1.00]
6361  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
6362  %vaddr = bitcast i8* %addr to <16 x i32>*
6363  %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
6364  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
6365  ret <16 x i32>%res
6366}
6367
6368define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
6369; GENERIC-LABEL: mov_test33:
6370; GENERIC:       # %bb.0:
6371; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
6372; GENERIC-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
6373; GENERIC-NEXT:    retq # sched: [1:1.00]
6374;
6375; SKX-LABEL: mov_test33:
6376; SKX:       # %bb.0:
6377; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
6378; SKX-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6379; SKX-NEXT:    retq # sched: [7:1.00]
6380  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
6381  %vaddr = bitcast i8* %addr to <16 x i32>*
6382  %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
6383  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
6384  ret <16 x i32>%res
6385}
6386
6387define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) {
6388; GENERIC-LABEL: mov_test34:
6389; GENERIC:       # %bb.0:
6390; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
6391; GENERIC-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
6392; GENERIC-NEXT:    retq # sched: [1:1.00]
6393;
6394; SKX-LABEL: mov_test34:
6395; SKX:       # %bb.0:
6396; SKX-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
6397; SKX-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6398; SKX-NEXT:    retq # sched: [7:1.00]
6399  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
6400  %vaddr = bitcast i8* %addr to <16 x i32>*
6401  %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
6402  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
6403  ret <16 x i32>%res
6404}
6405
6406define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) {
6407; GENERIC-LABEL: mov_test35:
6408; GENERIC:       # %bb.0:
6409; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
6410; GENERIC-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
6411; GENERIC-NEXT:    retq # sched: [1:1.00]
6412;
6413; SKX-LABEL: mov_test35:
6414; SKX:       # %bb.0:
6415; SKX-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
6416; SKX-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6417; SKX-NEXT:    retq # sched: [7:1.00]
6418  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
6419  %vaddr = bitcast i8* %addr to <16 x i32>*
6420  %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
6421  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
6422  ret <16 x i32>%res
6423}
6424
6425define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
6426; GENERIC-LABEL: mov_test36:
6427; GENERIC:       # %bb.0:
6428; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
6429; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
6430; GENERIC-NEXT:    retq # sched: [1:1.00]
6431;
6432; SKX-LABEL: mov_test36:
6433; SKX:       # %bb.0:
6434; SKX-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
6435; SKX-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6436; SKX-NEXT:    retq # sched: [7:1.00]
6437  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
6438  %vaddr = bitcast i8* %addr to <8 x i64>*
6439  %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
6440  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
6441  ret <8 x i64>%res
6442}
6443
6444define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
6445; GENERIC-LABEL: mov_test37:
6446; GENERIC:       # %bb.0:
6447; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
6448; GENERIC-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
6449; GENERIC-NEXT:    retq # sched: [1:1.00]
6450;
6451; SKX-LABEL: mov_test37:
6452; SKX:       # %bb.0:
6453; SKX-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
6454; SKX-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6455; SKX-NEXT:    retq # sched: [7:1.00]
6456  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
6457  %vaddr = bitcast i8* %addr to <8 x i64>*
6458  %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
6459  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
6460  ret <8 x i64>%res
6461}
6462
6463define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) {
6464; GENERIC-LABEL: mov_test38:
6465; GENERIC:       # %bb.0:
6466; GENERIC-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
6467; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
6468; GENERIC-NEXT:    retq # sched: [1:1.00]
6469;
6470; SKX-LABEL: mov_test38:
6471; SKX:       # %bb.0:
6472; SKX-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
6473; SKX-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6474; SKX-NEXT:    retq # sched: [7:1.00]
6475  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
6476  %vaddr = bitcast i8* %addr to <8 x i64>*
6477  %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
6478  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
6479  ret <8 x i64>%res
6480}
6481
6482define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) {
6483; GENERIC-LABEL: mov_test39:
6484; GENERIC:       # %bb.0:
6485; GENERIC-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
6486; GENERIC-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
6487; GENERIC-NEXT:    retq # sched: [1:1.00]
6488;
6489; SKX-LABEL: mov_test39:
6490; SKX:       # %bb.0:
6491; SKX-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
6492; SKX-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6493; SKX-NEXT:    retq # sched: [7:1.00]
6494  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
6495  %vaddr = bitcast i8* %addr to <8 x i64>*
6496  %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
6497  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
6498  ret <8 x i64>%res
6499}
6500
6501define <16 x float> @mov_test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
6502; GENERIC-LABEL: mov_test40:
6503; GENERIC:       # %bb.0:
6504; GENERIC-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
6505; GENERIC-NEXT:    vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
6506; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 {%k1} # sched: [7:0.50]
6507; GENERIC-NEXT:    retq # sched: [1:1.00]
6508;
6509; SKX-LABEL: mov_test40:
6510; SKX:       # %bb.0:
6511; SKX-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
6512; SKX-NEXT:    vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
6513; SKX-NEXT:    vmovaps (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6514; SKX-NEXT:    retq # sched: [7:1.00]
6515  %mask = fcmp one <16 x float> %mask1, zeroinitializer
6516  %vaddr = bitcast i8* %addr to <16 x float>*
6517  %r = load <16 x float>, <16 x float>* %vaddr, align 64
6518  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
6519  ret <16 x float>%res
6520}
6521
6522define <16 x float> @mov_test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
6523; GENERIC-LABEL: mov_test41:
6524; GENERIC:       # %bb.0:
6525; GENERIC-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
6526; GENERIC-NEXT:    vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
6527; GENERIC-NEXT:    vmovups (%rdi), %zmm0 {%k1} # sched: [7:0.50]
6528; GENERIC-NEXT:    retq # sched: [1:1.00]
6529;
6530; SKX-LABEL: mov_test41:
6531; SKX:       # %bb.0:
6532; SKX-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
6533; SKX-NEXT:    vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
6534; SKX-NEXT:    vmovups (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6535; SKX-NEXT:    retq # sched: [7:1.00]
6536  %mask = fcmp one <16 x float> %mask1, zeroinitializer
6537  %vaddr = bitcast i8* %addr to <16 x float>*
6538  %r = load <16 x float>, <16 x float>* %vaddr, align 1
6539  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
6540  ret <16 x float>%res
6541}
6542
6543define <16 x float> @mov_test42(i8 * %addr, <16 x float> %mask1) {
6544; GENERIC-LABEL: mov_test42:
6545; GENERIC:       # %bb.0:
6546; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
6547; GENERIC-NEXT:    vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
6548; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
6549; GENERIC-NEXT:    retq # sched: [1:1.00]
6550;
6551; SKX-LABEL: mov_test42:
6552; SKX:       # %bb.0:
6553; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
6554; SKX-NEXT:    vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
6555; SKX-NEXT:    vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6556; SKX-NEXT:    retq # sched: [7:1.00]
6557  %mask = fcmp one <16 x float> %mask1, zeroinitializer
6558  %vaddr = bitcast i8* %addr to <16 x float>*
6559  %r = load <16 x float>, <16 x float>* %vaddr, align 64
6560  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
6561  ret <16 x float>%res
6562}
6563
6564define <16 x float> @mov_test43(i8 * %addr, <16 x float> %mask1) {
6565; GENERIC-LABEL: mov_test43:
6566; GENERIC:       # %bb.0:
6567; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
6568; GENERIC-NEXT:    vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
6569; GENERIC-NEXT:    vmovups (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
6570; GENERIC-NEXT:    retq # sched: [1:1.00]
6571;
6572; SKX-LABEL: mov_test43:
6573; SKX:       # %bb.0:
6574; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
6575; SKX-NEXT:    vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
6576; SKX-NEXT:    vmovups (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6577; SKX-NEXT:    retq # sched: [7:1.00]
6578  %mask = fcmp one <16 x float> %mask1, zeroinitializer
6579  %vaddr = bitcast i8* %addr to <16 x float>*
6580  %r = load <16 x float>, <16 x float>* %vaddr, align 1
6581  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
6582  ret <16 x float>%res
6583}
6584
6585define <8 x double> @mov_test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
6586; GENERIC-LABEL: mov_test44:
6587; GENERIC:       # %bb.0:
6588; GENERIC-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
6589; GENERIC-NEXT:    vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
6590; GENERIC-NEXT:    vmovapd (%rdi), %zmm0 {%k1} # sched: [7:0.50]
6591; GENERIC-NEXT:    retq # sched: [1:1.00]
6592;
6593; SKX-LABEL: mov_test44:
6594; SKX:       # %bb.0:
6595; SKX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
6596; SKX-NEXT:    vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
6597; SKX-NEXT:    vmovapd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6598; SKX-NEXT:    retq # sched: [7:1.00]
6599  %mask = fcmp one <8 x double> %mask1, zeroinitializer
6600  %vaddr = bitcast i8* %addr to <8 x double>*
6601  %r = load <8 x double>, <8 x double>* %vaddr, align 64
6602  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
6603  ret <8 x double>%res
6604}
6605
6606define <8 x double> @mov_test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
6607; GENERIC-LABEL: mov_test45:
6608; GENERIC:       # %bb.0:
6609; GENERIC-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
6610; GENERIC-NEXT:    vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
6611; GENERIC-NEXT:    vmovupd (%rdi), %zmm0 {%k1} # sched: [7:0.50]
6612; GENERIC-NEXT:    retq # sched: [1:1.00]
6613;
6614; SKX-LABEL: mov_test45:
6615; SKX:       # %bb.0:
6616; SKX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
6617; SKX-NEXT:    vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
6618; SKX-NEXT:    vmovupd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6619; SKX-NEXT:    retq # sched: [7:1.00]
6620  %mask = fcmp one <8 x double> %mask1, zeroinitializer
6621  %vaddr = bitcast i8* %addr to <8 x double>*
6622  %r = load <8 x double>, <8 x double>* %vaddr, align 1
6623  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
6624  ret <8 x double>%res
6625}
6626
6627define <8 x double> @mov_test46(i8 * %addr, <8 x double> %mask1) {
6628; GENERIC-LABEL: mov_test46:
6629; GENERIC:       # %bb.0:
6630; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
6631; GENERIC-NEXT:    vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
6632; GENERIC-NEXT:    vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
6633; GENERIC-NEXT:    retq # sched: [1:1.00]
6634;
6635; SKX-LABEL: mov_test46:
6636; SKX:       # %bb.0:
6637; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
6638; SKX-NEXT:    vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
6639; SKX-NEXT:    vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6640; SKX-NEXT:    retq # sched: [7:1.00]
6641  %mask = fcmp one <8 x double> %mask1, zeroinitializer
6642  %vaddr = bitcast i8* %addr to <8 x double>*
6643  %r = load <8 x double>, <8 x double>* %vaddr, align 64
6644  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
6645  ret <8 x double>%res
6646}
6647
6648define <8 x double> @mov_test47(i8 * %addr, <8 x double> %mask1) {
6649; GENERIC-LABEL: mov_test47:
6650; GENERIC:       # %bb.0:
6651; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
6652; GENERIC-NEXT:    vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
6653; GENERIC-NEXT:    vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
6654; GENERIC-NEXT:    retq # sched: [1:1.00]
6655;
6656; SKX-LABEL: mov_test47:
6657; SKX:       # %bb.0:
6658; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
6659; SKX-NEXT:    vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
6660; SKX-NEXT:    vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6661; SKX-NEXT:    retq # sched: [7:1.00]
6662  %mask = fcmp one <8 x double> %mask1, zeroinitializer
6663  %vaddr = bitcast i8* %addr to <8 x double>*
6664  %r = load <8 x double>, <8 x double>* %vaddr, align 1
6665  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
6666  ret <8 x double>%res
6667}
6668
6669define i16 @mask16(i16 %x) {
6670; GENERIC-LABEL: mask16:
6671; GENERIC:       # %bb.0:
6672; GENERIC-NEXT:    notl %edi # sched: [1:0.33]
6673; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
6674; GENERIC-NEXT:    retq # sched: [1:1.00]
6675;
6676; SKX-LABEL: mask16:
6677; SKX:       # %bb.0:
6678; SKX-NEXT:    notl %edi # sched: [1:0.25]
6679; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
6680; SKX-NEXT:    retq # sched: [7:1.00]
6681  %m0 = bitcast i16 %x to <16 x i1>
6682  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6683  %ret = bitcast <16 x i1> %m1 to i16
6684  ret i16 %ret
6685}
6686
6687define i32 @mask16_zext(i16 %x) {
6688; GENERIC-LABEL: mask16_zext:
6689; GENERIC:       # %bb.0:
6690; GENERIC-NEXT:    notl %edi # sched: [1:0.33]
6691; GENERIC-NEXT:    movzwl %di, %eax # sched: [1:0.33]
6692; GENERIC-NEXT:    retq # sched: [1:1.00]
6693;
6694; SKX-LABEL: mask16_zext:
6695; SKX:       # %bb.0:
6696; SKX-NEXT:    notl %edi # sched: [1:0.25]
6697; SKX-NEXT:    movzwl %di, %eax # sched: [1:0.25]
6698; SKX-NEXT:    retq # sched: [7:1.00]
6699  %m0 = bitcast i16 %x to <16 x i1>
6700  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6701  %m2 = bitcast <16 x i1> %m1 to i16
6702  %ret = zext i16 %m2 to i32
6703  ret i32 %ret
6704}
6705
6706define i8 @mask8(i8 %x) {
6707; GENERIC-LABEL: mask8:
6708; GENERIC:       # %bb.0:
6709; GENERIC-NEXT:    notb %dil # sched: [1:0.33]
6710; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
6711; GENERIC-NEXT:    retq # sched: [1:1.00]
6712;
6713; SKX-LABEL: mask8:
6714; SKX:       # %bb.0:
6715; SKX-NEXT:    notb %dil # sched: [1:0.25]
6716; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
6717; SKX-NEXT:    retq # sched: [7:1.00]
6718  %m0 = bitcast i8 %x to <8 x i1>
6719  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6720  %ret = bitcast <8 x i1> %m1 to i8
6721  ret i8 %ret
6722}
6723
6724define i32 @mask8_zext(i8 %x) {
6725; GENERIC-LABEL: mask8_zext:
6726; GENERIC:       # %bb.0:
6727; GENERIC-NEXT:    notb %dil # sched: [1:0.33]
6728; GENERIC-NEXT:    movzbl %dil, %eax # sched: [1:0.33]
6729; GENERIC-NEXT:    retq # sched: [1:1.00]
6730;
6731; SKX-LABEL: mask8_zext:
6732; SKX:       # %bb.0:
6733; SKX-NEXT:    notb %dil # sched: [1:0.25]
6734; SKX-NEXT:    movzbl %dil, %eax # sched: [1:0.25]
6735; SKX-NEXT:    retq # sched: [7:1.00]
6736  %m0 = bitcast i8 %x to <8 x i1>
6737  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6738  %m2 = bitcast <8 x i1> %m1 to i8
6739  %ret = zext i8 %m2 to i32
6740  ret i32 %ret
6741}
6742
6743define void @mask16_mem(i16* %ptr) {
6744; GENERIC-LABEL: mask16_mem:
6745; GENERIC:       # %bb.0:
6746; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [5:0.50]
6747; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
6748; GENERIC-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
6749; GENERIC-NEXT:    retq # sched: [1:1.00]
6750;
6751; SKX-LABEL: mask16_mem:
6752; SKX:       # %bb.0:
6753; SKX-NEXT:    kmovw (%rdi), %k0 # sched: [7:1.00]
6754; SKX-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
6755; SKX-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
6756; SKX-NEXT:    retq # sched: [7:1.00]
6757  %x = load i16, i16* %ptr, align 4
6758  %m0 = bitcast i16 %x to <16 x i1>
6759  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6760  %ret = bitcast <16 x i1> %m1 to i16
6761  store i16 %ret, i16* %ptr, align 4
6762  ret void
6763}
6764
6765define void @mask8_mem(i8* %ptr) {
6766; GENERIC-LABEL: mask8_mem:
6767; GENERIC:       # %bb.0:
6768; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
6769; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:0.33]
6770; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
6771; GENERIC-NEXT:    retq # sched: [1:1.00]
6772;
6773; SKX-LABEL: mask8_mem:
6774; SKX:       # %bb.0:
6775; SKX-NEXT:    kmovb (%rdi), %k0 # sched: [7:1.00]
6776; SKX-NEXT:    knotb %k0, %k0 # sched: [1:1.00]
6777; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
6778; SKX-NEXT:    retq # sched: [7:1.00]
6779  %x = load i8, i8* %ptr, align 4
6780  %m0 = bitcast i8 %x to <8 x i1>
6781  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6782  %ret = bitcast <8 x i1> %m1 to i8
6783  store i8 %ret, i8* %ptr, align 4
6784  ret void
6785}
6786
6787define i16 @mand16(i16 %x, i16 %y) {
6788; GENERIC-LABEL: mand16:
6789; GENERIC:       # %bb.0:
6790; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
6791; GENERIC-NEXT:    xorl %esi, %eax # sched: [1:0.33]
6792; GENERIC-NEXT:    andl %esi, %edi # sched: [1:0.33]
6793; GENERIC-NEXT:    orl %eax, %edi # sched: [1:0.33]
6794; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
6795; GENERIC-NEXT:    retq # sched: [1:1.00]
6796;
6797; SKX-LABEL: mand16:
6798; SKX:       # %bb.0:
6799; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
6800; SKX-NEXT:    xorl %esi, %eax # sched: [1:0.25]
6801; SKX-NEXT:    andl %esi, %edi # sched: [1:0.25]
6802; SKX-NEXT:    orl %eax, %edi # sched: [1:0.25]
6803; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
6804; SKX-NEXT:    retq # sched: [7:1.00]
6805  %ma = bitcast i16 %x to <16 x i1>
6806  %mb = bitcast i16 %y to <16 x i1>
6807  %mc = and <16 x i1> %ma, %mb
6808  %md = xor <16 x i1> %ma, %mb
6809  %me = or <16 x i1> %mc, %md
6810  %ret = bitcast <16 x i1> %me to i16
6811  ret i16 %ret
6812}
6813
6814define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
6815; GENERIC-LABEL: mand16_mem:
6816; GENERIC:       # %bb.0:
6817; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [5:0.50]
6818; GENERIC-NEXT:    kmovw (%rsi), %k1 # sched: [5:0.50]
6819; GENERIC-NEXT:    kandw %k1, %k0, %k2 # sched: [1:0.33]
6820; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
6821; GENERIC-NEXT:    korw %k0, %k2, %k0 # sched: [1:0.33]
6822; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
6823; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
6824; GENERIC-NEXT:    retq # sched: [1:1.00]
6825;
6826; SKX-LABEL: mand16_mem:
6827; SKX:       # %bb.0:
6828; SKX-NEXT:    kmovw (%rdi), %k0 # sched: [7:1.00]
6829; SKX-NEXT:    kmovw (%rsi), %k1 # sched: [7:1.00]
6830; SKX-NEXT:    kandw %k1, %k0, %k2 # sched: [1:1.00]
6831; SKX-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
6832; SKX-NEXT:    korw %k0, %k2, %k0 # sched: [1:1.00]
6833; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
6834; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
6835; SKX-NEXT:    retq # sched: [7:1.00]
6836  %ma = load <16 x i1>, <16 x i1>* %x
6837  %mb = load <16 x i1>, <16 x i1>* %y
6838  %mc = and <16 x i1> %ma, %mb
6839  %md = xor <16 x i1> %ma, %mb
6840  %me = or <16 x i1> %mc, %md
6841  %ret = bitcast <16 x i1> %me to i16
6842  ret i16 %ret
6843}
6844
6845define i8 @shuf_test1(i16 %v) nounwind {
6846; GENERIC-LABEL: shuf_test1:
6847; GENERIC:       # %bb.0:
6848; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
6849; GENERIC-NEXT:    kshiftrw $8, %k0, %k0 # sched: [1:1.00]
6850; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
6851; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
6852; GENERIC-NEXT:    retq # sched: [1:1.00]
6853;
6854; SKX-LABEL: shuf_test1:
6855; SKX:       # %bb.0:
6856; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
6857; SKX-NEXT:    kshiftrw $8, %k0, %k0 # sched: [3:1.00]
6858; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
6859; SKX-NEXT:    # kill: def $al killed $al killed $eax
6860; SKX-NEXT:    retq # sched: [7:1.00]
6861   %v1 = bitcast i16 %v to <16 x i1>
6862   %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6863   %mask1 = bitcast <8 x i1> %mask to i8
6864   ret i8 %mask1
6865}
6866
6867define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
6868; GENERIC-LABEL: zext_test1:
6869; GENERIC:       # %bb.0:
6870; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
6871; GENERIC-NEXT:    kshiftrw $5, %k0, %k0 # sched: [1:1.00]
6872; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
6873; GENERIC-NEXT:    andl $1, %eax # sched: [1:0.33]
6874; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6875; GENERIC-NEXT:    retq # sched: [1:1.00]
6876;
6877; SKX-LABEL: zext_test1:
6878; SKX:       # %bb.0:
6879; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
6880; SKX-NEXT:    kshiftrw $5, %k0, %k0 # sched: [3:1.00]
6881; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
6882; SKX-NEXT:    andl $1, %eax # sched: [1:0.25]
6883; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6884; SKX-NEXT:    retq # sched: [7:1.00]
6885  %cmp_res = icmp ugt <16 x i32> %a, %b
6886  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
6887  %res = zext i1 %cmp_res.i1 to i32
6888  ret i32 %res
6889}
6890
6891define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
6892; GENERIC-LABEL: zext_test2:
6893; GENERIC:       # %bb.0:
6894; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
6895; GENERIC-NEXT:    kshiftrw $5, %k0, %k0 # sched: [1:1.00]
6896; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
6897; GENERIC-NEXT:    andl $1, %eax # sched: [1:0.33]
6898; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
6899; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6900; GENERIC-NEXT:    retq # sched: [1:1.00]
6901;
6902; SKX-LABEL: zext_test2:
6903; SKX:       # %bb.0:
6904; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
6905; SKX-NEXT:    kshiftrw $5, %k0, %k0 # sched: [3:1.00]
6906; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
6907; SKX-NEXT:    andl $1, %eax # sched: [1:0.25]
6908; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
6909; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6910; SKX-NEXT:    retq # sched: [7:1.00]
6911  %cmp_res = icmp ugt <16 x i32> %a, %b
6912  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
6913  %res = zext i1 %cmp_res.i1 to i16
6914  ret i16 %res
6915}
6916
6917define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
6918; GENERIC-LABEL: zext_test3:
6919; GENERIC:       # %bb.0:
6920; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
6921; GENERIC-NEXT:    kshiftrw $5, %k0, %k0 # sched: [1:1.00]
6922; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
6923; GENERIC-NEXT:    andb $1, %al # sched: [1:0.33]
6924; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
6925; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6926; GENERIC-NEXT:    retq # sched: [1:1.00]
6927;
6928; SKX-LABEL: zext_test3:
6929; SKX:       # %bb.0:
6930; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
6931; SKX-NEXT:    kshiftrw $5, %k0, %k0 # sched: [3:1.00]
6932; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
6933; SKX-NEXT:    andb $1, %al # sched: [1:0.25]
6934; SKX-NEXT:    # kill: def $al killed $al killed $eax
6935; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6936; SKX-NEXT:    retq # sched: [7:1.00]
6937  %cmp_res = icmp ugt <16 x i32> %a, %b
6938  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
6939  %res = zext i1 %cmp_res.i1 to i8
6940  ret i8 %res
6941}
6942
6943define i8 @conv1(<8 x i1>* %R) {
6944; GENERIC-LABEL: conv1:
6945; GENERIC:       # %bb.0: # %entry
6946; GENERIC-NEXT:    movb $-1, (%rdi) # sched: [1:1.00]
6947; GENERIC-NEXT:    movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
6948; GENERIC-NEXT:    movb $-2, %al # sched: [1:0.33]
6949; GENERIC-NEXT:    retq # sched: [1:1.00]
6950;
6951; SKX-LABEL: conv1:
6952; SKX:       # %bb.0: # %entry
6953; SKX-NEXT:    movb $-1, (%rdi) # sched: [1:1.00]
6954; SKX-NEXT:    movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
6955; SKX-NEXT:    movb $-2, %al # sched: [1:0.25]
6956; SKX-NEXT:    retq # sched: [7:1.00]
6957entry:
6958  store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
6959
6960  %maskPtr = alloca <8 x i1>
6961  store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
6962  %mask = load <8 x i1>, <8 x i1>* %maskPtr
6963  %mask_convert = bitcast <8 x i1> %mask to i8
6964  ret i8 %mask_convert
6965}
6966
6967define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
6968; GENERIC-LABEL: test4:
6969; GENERIC:       # %bb.0:
6970; GENERIC-NEXT:    vpcmpleq %ymm1, %ymm0, %k1 # sched: [1:0.50]
6971; GENERIC-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [1:0.50]
6972; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
6973; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
6974; GENERIC-NEXT:    retq # sched: [1:1.00]
6975;
6976; SKX-LABEL: test4:
6977; SKX:       # %bb.0:
6978; SKX-NEXT:    vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
6979; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
6980; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
6981; SKX-NEXT:    vzeroupper # sched: [4:1.00]
6982; SKX-NEXT:    retq # sched: [7:1.00]
6983  %x_gt_y = icmp sgt <4 x i64> %x, %y
6984  %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
6985  %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
6986  %resse = sext <4 x i1>%res to <4 x i32>
6987  ret <4 x i32> %resse
6988}
6989
6990define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
6991; GENERIC-LABEL: vcmp_test5:
6992; GENERIC:       # %bb.0:
6993; GENERIC-NEXT:    vpcmpleq %xmm3, %xmm2, %k1 # sched: [1:0.50]
6994; GENERIC-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [1:0.50]
6995; GENERIC-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.33]
6996; GENERIC-NEXT:    retq # sched: [1:1.00]
6997;
6998; SKX-LABEL: vcmp_test5:
6999; SKX:       # %bb.0:
7000; SKX-NEXT:    vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00]
7001; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00]
7002; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
7003; SKX-NEXT:    retq # sched: [7:1.00]
7004  %x_gt_y = icmp slt <2 x i64> %x, %y
7005  %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
7006  %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
7007  %resse = sext <2 x i1>%res to <2 x i64>
7008  ret <2 x i64> %resse
7009}define void @vcmp_test6(<16 x i1> %mask)  {
7010allocas:
7011  %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
7012  %b = bitcast <16 x i1> %a to i16
7013  %c = icmp eq i16 %b, 0
7014  br i1 %c, label %true, label %false
7015
7016true:
7017  ret void
7018
7019false:
7020  ret void
7021}
7022define void @vcmp_test7(<8 x i1> %mask)  {
7023; GENERIC-LABEL: vcmp_test7:
7024; GENERIC:       # %bb.0: # %allocas
7025; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
7026; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
7027; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
7028; GENERIC-NEXT:    orb $85, %al # sched: [1:0.33]
7029; GENERIC-NEXT:    retq # sched: [1:1.00]
7030;
7031; SKX-LABEL: vcmp_test7:
7032; SKX:       # %bb.0: # %allocas
7033; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
7034; SKX-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:1.00]
7035; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
7036; SKX-NEXT:    orb $85, %al # sched: [1:0.25]
7037; SKX-NEXT:    retq # sched: [7:1.00]
7038allocas:
7039  %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
7040  %b = bitcast <8 x i1> %a to i8
7041  %c = icmp eq i8 %b, 0
7042  br i1 %c, label %true, label %false
7043
7044true:
7045  ret void
7046
7047false:
7048  ret void
7049}
7050define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
7051; GENERIC-LABEL: vcmp_test8:
7052; GENERIC:       # %bb.0:
7053; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
7054; GENERIC-NEXT:    jg .LBB386_1 # sched: [1:1.00]
7055; GENERIC-NEXT:  # %bb.2:
7056; GENERIC-NEXT:    kxorw %k0, %k0, %k0 # sched: [1:0.33]
7057; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
7058; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
7059; GENERIC-NEXT:    retq # sched: [1:1.00]
7060; GENERIC-NEXT:  .LBB386_1:
7061; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
7062; GENERIC-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50]
7063; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
7064; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
7065; GENERIC-NEXT:    retq # sched: [1:1.00]
7066;
7067; SKX-LABEL: vcmp_test8:
7068; SKX:       # %bb.0:
7069; SKX-NEXT:    cmpl %esi, %edi # sched: [1:0.25]
7070; SKX-NEXT:    jg .LBB386_1 # sched: [1:0.50]
7071; SKX-NEXT:  # %bb.2:
7072; SKX-NEXT:    kxorw %k0, %k0, %k0 # sched: [1:1.00]
7073; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
7074; SKX-NEXT:    vzeroupper # sched: [4:1.00]
7075; SKX-NEXT:    retq # sched: [7:1.00]
7076; SKX-NEXT:  .LBB386_1:
7077; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
7078; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00]
7079; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
7080; SKX-NEXT:    vzeroupper # sched: [4:1.00]
7081; SKX-NEXT:    retq # sched: [7:1.00]
7082  %cond = icmp sgt i32 %a1, %b1
7083  %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
7084  %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
7085  %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
7086  %res = sext <16 x i1> %mix to <16 x i8>
7087  ret <16 x i8> %res
7088}
7089define <16 x i1> @vpmov_test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
7090; GENERIC-LABEL: vpmov_test9:
7091; GENERIC:       # %bb.0:
7092; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
7093; GENERIC-NEXT:    jg .LBB387_1 # sched: [1:1.00]
7094; GENERIC-NEXT:  # %bb.2:
7095; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm0 # sched: [1:1.00]
7096; GENERIC-NEXT:    jmp .LBB387_3 # sched: [1:1.00]
7097; GENERIC-NEXT:  .LBB387_1:
7098; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
7099; GENERIC-NEXT:  .LBB387_3:
7100; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
7101; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
7102; GENERIC-NEXT:    retq # sched: [1:1.00]
7103;
7104; SKX-LABEL: vpmov_test9:
7105; SKX:       # %bb.0:
7106; SKX-NEXT:    cmpl %esi, %edi # sched: [1:0.25]
7107; SKX-NEXT:    jg .LBB387_1 # sched: [1:0.50]
7108; SKX-NEXT:  # %bb.2:
7109; SKX-NEXT:    vpsllw $7, %xmm1, %xmm0 # sched: [1:0.50]
7110; SKX-NEXT:    jmp .LBB387_3 # sched: [1:0.50]
7111; SKX-NEXT:  .LBB387_1:
7112; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
7113; SKX-NEXT:  .LBB387_3:
7114; SKX-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:1.00]
7115; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
7116; SKX-NEXT:    retq # sched: [7:1.00]
7117  %mask = icmp sgt i32 %a1, %b1
7118  %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
7119  ret <16 x i1>%c
7120}define <8 x i1> @vpmov_test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
7121  %mask = icmp sgt i32 %a1, %b1
7122  %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
7123  ret <8 x i1>%c
7124}
7125
7126define <4 x i1> @vmov_test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
7127; GENERIC-LABEL: vmov_test11:
7128; GENERIC:       # %bb.0:
7129; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
7130; GENERIC-NEXT:    jg .LBB389_1 # sched: [1:1.00]
7131; GENERIC-NEXT:  # %bb.2:
7132; GENERIC-NEXT:    vpslld $31, %xmm1, %xmm0 # sched: [1:1.00]
7133; GENERIC-NEXT:    jmp .LBB389_3 # sched: [1:1.00]
7134; GENERIC-NEXT:  .LBB389_1:
7135; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
7136; GENERIC-NEXT:  .LBB389_3:
7137; GENERIC-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:0.33]
7138; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
7139; GENERIC-NEXT:    retq # sched: [1:1.00]
7140;
7141; SKX-LABEL: vmov_test11:
7142; SKX:       # %bb.0:
7143; SKX-NEXT:    cmpl %esi, %edi # sched: [1:0.25]
7144; SKX-NEXT:    jg .LBB389_1 # sched: [1:0.50]
7145; SKX-NEXT:  # %bb.2:
7146; SKX-NEXT:    vpslld $31, %xmm1, %xmm0 # sched: [1:0.50]
7147; SKX-NEXT:    jmp .LBB389_3 # sched: [1:0.50]
7148; SKX-NEXT:  .LBB389_1:
7149; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
7150; SKX-NEXT:  .LBB389_3:
7151; SKX-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:1.00]
7152; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
7153; SKX-NEXT:    retq # sched: [7:1.00]
7154  %mask = icmp sgt i32 %a1, %b1
7155  %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
7156  ret <4 x i1>%c
7157}
7158
7159define i32 @vmov_test12(i32 %x, i32 %y)  {
7160; GENERIC-LABEL: vmov_test12:
7161; GENERIC:       # %bb.0:
7162; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
7163; GENERIC-NEXT:    retq # sched: [1:1.00]
7164;
7165; SKX-LABEL: vmov_test12:
7166; SKX:       # %bb.0:
7167; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
7168; SKX-NEXT:    retq # sched: [7:1.00]
7169  %a = bitcast i16 21845 to <16 x i1>
7170  %b = extractelement <16 x i1> %a, i32 0
7171  %c = select i1 %b, i32 %x, i32 %y
7172  ret i32 %c
7173}
7174
7175define i32 @vmov_test13(i32 %x, i32 %y)  {
7176; GENERIC-LABEL: vmov_test13:
7177; GENERIC:       # %bb.0:
7178; GENERIC-NEXT:    movl %esi, %eax # sched: [1:0.33]
7179; GENERIC-NEXT:    retq # sched: [1:1.00]
7180;
7181; SKX-LABEL: vmov_test13:
7182; SKX:       # %bb.0:
7183; SKX-NEXT:    movl %esi, %eax # sched: [1:0.25]
7184; SKX-NEXT:    retq # sched: [7:1.00]
7185  %a = bitcast i16 21845 to <16 x i1>
7186  %b = extractelement <16 x i1> %a, i32 3
7187  %c = select i1 %b, i32 %x, i32 %y
7188  ret i32 %c
7189}define <4 x i1> @vmov_test14()  {
7190  %a = bitcast i16 21845 to <16 x i1>
7191  %b = extractelement <16 x i1> %a, i32 2
7192  %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
7193  ret <4 x i1> %c
7194}
7195
7196define <16 x i1> @vmov_test15(i32 %x, i32 %y)  {
7197; GENERIC-LABEL: vmov_test15:
7198; GENERIC:       # %bb.0:
7199; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
7200; GENERIC-NEXT:    movl $21845, %eax # imm = 0x5555
7201; GENERIC-NEXT:    # sched: [1:0.33]
7202; GENERIC-NEXT:    movl $1, %ecx # sched: [1:0.33]
7203; GENERIC-NEXT:    cmovgl %eax, %ecx # sched: [2:0.67]
7204; GENERIC-NEXT:    kmovd %ecx, %k0 # sched: [1:0.33]
7205; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
7206; GENERIC-NEXT:    retq # sched: [1:1.00]
7207;
7208; SKX-LABEL: vmov_test15:
7209; SKX:       # %bb.0:
7210; SKX-NEXT:    cmpl %esi, %edi # sched: [1:0.25]
7211; SKX-NEXT:    movl $21845, %eax # imm = 0x5555
7212; SKX-NEXT:    # sched: [1:0.25]
7213; SKX-NEXT:    movl $1, %ecx # sched: [1:0.25]
7214; SKX-NEXT:    cmovgl %eax, %ecx # sched: [1:0.50]
7215; SKX-NEXT:    kmovd %ecx, %k0 # sched: [1:1.00]
7216; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
7217; SKX-NEXT:    retq # sched: [7:1.00]
7218  %a = bitcast i16 21845 to <16 x i1>
7219  %b = bitcast i16 1 to <16 x i1>
7220  %mask = icmp sgt i32 %x, %y
7221  %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
7222  ret <16 x i1> %c
7223}
7224
7225define <64 x i8> @vmov_test16(i64 %x) {
7226;
7227; GENERIC-LABEL: vmov_test16:
7228; GENERIC:       # %bb.0:
7229; GENERIC-NEXT:    kmovq %rdi, %k0 # sched: [1:0.33]
7230; GENERIC-NEXT:    movb $1, %al # sched: [1:0.33]
7231; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
7232; GENERIC-NEXT:    kshiftrq $5, %k0, %k2 # sched: [1:1.00]
7233; GENERIC-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:0.33]
7234; GENERIC-NEXT:    kshiftlq $63, %k1, %k1 # sched: [1:1.00]
7235; GENERIC-NEXT:    kshiftrq $58, %k1, %k1 # sched: [1:1.00]
7236; GENERIC-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:0.33]
7237; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
7238; GENERIC-NEXT:    retq # sched: [1:1.00]
7239;
7240; SKX-LABEL: vmov_test16:
7241; SKX:       # %bb.0:
7242; SKX-NEXT:    kmovq %rdi, %k0 # sched: [1:1.00]
7243; SKX-NEXT:    movb $1, %al # sched: [1:0.25]
7244; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
7245; SKX-NEXT:    kshiftrq $5, %k0, %k2 # sched: [3:1.00]
7246; SKX-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:1.00]
7247; SKX-NEXT:    kshiftlq $63, %k1, %k1 # sched: [3:1.00]
7248; SKX-NEXT:    kshiftrq $58, %k1, %k1 # sched: [3:1.00]
7249; SKX-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:1.00]
7250; SKX-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.25]
7251; SKX-NEXT:    retq # sched: [7:1.00]
7252  %a = bitcast i64 %x to <64 x i1>
7253  %b = insertelement <64 x i1>%a, i1 true, i32 5
7254  %c = sext <64 x i1>%b to <64 x i8>
7255  ret <64 x i8>%c
7256}
7257
7258define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) {
7259;
7260; GENERIC-LABEL: vmov_test17:
7261; GENERIC:       # %bb.0:
7262; GENERIC-NEXT:    kmovq %rdi, %k0 # sched: [1:0.33]
7263; GENERIC-NEXT:    cmpl %edx, %esi # sched: [1:0.33]
7264; GENERIC-NEXT:    setg %al # sched: [1:0.50]
7265; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
7266; GENERIC-NEXT:    kshiftrq $5, %k0, %k2 # sched: [1:1.00]
7267; GENERIC-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:0.33]
7268; GENERIC-NEXT:    kshiftlq $63, %k1, %k1 # sched: [1:1.00]
7269; GENERIC-NEXT:    kshiftrq $58, %k1, %k1 # sched: [1:1.00]
7270; GENERIC-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:0.33]
7271; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
7272; GENERIC-NEXT:    retq # sched: [1:1.00]
7273;
7274; SKX-LABEL: vmov_test17:
7275; SKX:       # %bb.0:
7276; SKX-NEXT:    kmovq %rdi, %k0 # sched: [1:1.00]
7277; SKX-NEXT:    cmpl %edx, %esi # sched: [1:0.25]
7278; SKX-NEXT:    setg %al # sched: [1:0.50]
7279; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
7280; SKX-NEXT:    kshiftrq $5, %k0, %k2 # sched: [3:1.00]
7281; SKX-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:1.00]
7282; SKX-NEXT:    kshiftlq $63, %k1, %k1 # sched: [3:1.00]
7283; SKX-NEXT:    kshiftrq $58, %k1, %k1 # sched: [3:1.00]
7284; SKX-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:1.00]
7285; SKX-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.25]
7286; SKX-NEXT:    retq # sched: [7:1.00]
7287  %a = bitcast i64 %x to <64 x i1>
7288  %b = icmp sgt i32 %y, %z
7289  %c = insertelement <64 x i1>%a, i1 %b, i32 5
7290  %d = sext <64 x i1>%c to <64 x i8>
7291  ret <64 x i8>%d
7292}
7293
7294define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
7295; GENERIC-LABEL: vmov_test18:
7296; GENERIC:       # %bb.0:
7297; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
7298; GENERIC-NEXT:    kmovd %esi, %k2 # sched: [1:0.33]
7299; GENERIC-NEXT:    kshiftrw $8, %k2, %k0 # sched: [1:1.00]
7300; GENERIC-NEXT:    kshiftrw $9, %k2, %k2 # sched: [1:1.00]
7301; GENERIC-NEXT:    kshiftrb $6, %k1, %k3 # sched: [1:1.00]
7302; GENERIC-NEXT:    kxorb %k2, %k3, %k2 # sched: [1:0.33]
7303; GENERIC-NEXT:    kshiftlb $7, %k2, %k2 # sched: [1:1.00]
7304; GENERIC-NEXT:    kshiftrb $1, %k2, %k2 # sched: [1:1.00]
7305; GENERIC-NEXT:    kxorb %k2, %k1, %k1 # sched: [1:0.33]
7306; GENERIC-NEXT:    kshiftlb $1, %k1, %k1 # sched: [1:1.00]
7307; GENERIC-NEXT:    kshiftrb $1, %k1, %k1 # sched: [1:1.00]
7308; GENERIC-NEXT:    kshiftlb $7, %k0, %k0 # sched: [1:1.00]
7309; GENERIC-NEXT:    korb %k0, %k1, %k0 # sched: [1:0.33]
7310; GENERIC-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.33]
7311; GENERIC-NEXT:    retq # sched: [1:1.00]
7312;
7313; SKX-LABEL: vmov_test18:
7314; SKX:       # %bb.0:
7315; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
7316; SKX-NEXT:    kmovd %esi, %k2 # sched: [1:1.00]
7317; SKX-NEXT:    kshiftrw $8, %k2, %k0 # sched: [3:1.00]
7318; SKX-NEXT:    kshiftrw $9, %k2, %k2 # sched: [3:1.00]
7319; SKX-NEXT:    kshiftrb $6, %k1, %k3 # sched: [3:1.00]
7320; SKX-NEXT:    kxorb %k2, %k3, %k2 # sched: [1:1.00]
7321; SKX-NEXT:    kshiftlb $7, %k2, %k2 # sched: [3:1.00]
7322; SKX-NEXT:    kshiftrb $1, %k2, %k2 # sched: [3:1.00]
7323; SKX-NEXT:    kxorb %k2, %k1, %k1 # sched: [1:1.00]
7324; SKX-NEXT:    kshiftlb $1, %k1, %k1 # sched: [3:1.00]
7325; SKX-NEXT:    kshiftrb $1, %k1, %k1 # sched: [3:1.00]
7326; SKX-NEXT:    kshiftlb $7, %k0, %k0 # sched: [3:1.00]
7327; SKX-NEXT:    korb %k0, %k1, %k0 # sched: [1:1.00]
7328; SKX-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.25]
7329; SKX-NEXT:    retq # sched: [7:1.00]
7330  %b = bitcast i8 %a to <8 x i1>
7331  %b1 = bitcast i16 %y to <16 x i1>
7332  %el1 = extractelement <16 x i1>%b1, i32 8
7333  %el2 = extractelement <16 x i1>%b1, i32 9
7334  %c = insertelement <8 x i1>%b, i1 %el1, i32 7
7335  %d = insertelement <8 x i1>%c, i1 %el2, i32 6
7336  ret <8 x i1>%d
7337}
7338define <32 x i16> @vmov_test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
7339; GENERIC-LABEL: vmov_test21:
7340; GENERIC:       # %bb.0:
7341; GENERIC-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
7342; GENERIC-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:0.33]
7343; GENERIC-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
7344; GENERIC-NEXT:    retq # sched: [1:1.00]
7345;
7346; SKX-LABEL: vmov_test21:
7347; SKX:       # %bb.0:
7348; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
7349; SKX-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:1.00]
7350; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
7351; SKX-NEXT:    retq # sched: [7:1.00]
7352  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
7353  ret <32 x i16> %ret
7354}
7355
7356define void @vmov_test22(<4 x i1> %a, <4 x i1>* %addr) {
7357; GENERIC-LABEL: vmov_test22:
7358; GENERIC:       # %bb.0:
7359; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
7360; GENERIC-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:0.33]
7361; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7362; GENERIC-NEXT:    retq # sched: [1:1.00]
7363;
7364; SKX-LABEL: vmov_test22:
7365; SKX:       # %bb.0:
7366; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
7367; SKX-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:1.00]
7368; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7369; SKX-NEXT:    retq # sched: [7:1.00]
7370  store <4 x i1> %a, <4 x i1>* %addr
7371  ret void
7372}
7373
7374define void @vmov_test23(<2 x i1> %a, <2 x i1>* %addr) {
7375; GENERIC-LABEL: vmov_test23:
7376; GENERIC:       # %bb.0:
7377; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
7378; GENERIC-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:0.33]
7379; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7380; GENERIC-NEXT:    retq # sched: [1:1.00]
7381;
7382; SKX-LABEL: vmov_test23:
7383; SKX:       # %bb.0:
7384; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
7385; SKX-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:1.00]
7386; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7387; SKX-NEXT:    retq # sched: [7:1.00]
7388  store <2 x i1> %a, <2 x i1>* %addr
7389  ret void
7390}
7391
7392define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
7393; GENERIC-LABEL: store_v1i1:
7394; GENERIC:       # %bb.0:
7395; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
7396; GENERIC-NEXT:    kxnorw %k0, %k0, %k1 # sched: [1:0.33]
7397; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
7398; GENERIC-NEXT:    kmovb %k0, (%rsi) # sched: [1:1.00]
7399; GENERIC-NEXT:    retq # sched: [1:1.00]
7400;
7401; SKX-LABEL: store_v1i1:
7402; SKX:       # %bb.0:
7403; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
7404; SKX-NEXT:    kxnorw %k0, %k0, %k1 # sched: [1:1.00]
7405; SKX-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
7406; SKX-NEXT:    kmovb %k0, (%rsi) # sched: [1:1.00]
7407; SKX-NEXT:    retq # sched: [7:1.00]
7408  %x = xor <1 x i1> %c, <i1 1>
7409  store <1 x i1> %x, <1 x i1>*  %ptr, align 4
7410  ret void
7411}
7412
7413define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
7414; GENERIC-LABEL: store_v2i1:
7415; GENERIC:       # %bb.0:
7416; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
7417; GENERIC-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:0.33]
7418; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
7419; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7420; GENERIC-NEXT:    retq # sched: [1:1.00]
7421;
7422; SKX-LABEL: store_v2i1:
7423; SKX:       # %bb.0:
7424; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
7425; SKX-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:1.00]
7426; SKX-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
7427; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7428; SKX-NEXT:    retq # sched: [7:1.00]
7429  %x = xor <2 x i1> %c, <i1 1, i1 1>
7430  store <2 x i1> %x, <2 x i1>*  %ptr, align 4
7431  ret void
7432}
7433
7434define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
7435; GENERIC-LABEL: store_v4i1:
7436; GENERIC:       # %bb.0:
7437; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
7438; GENERIC-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:0.33]
7439; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
7440; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7441; GENERIC-NEXT:    retq # sched: [1:1.00]
7442;
7443; SKX-LABEL: store_v4i1:
7444; SKX:       # %bb.0:
7445; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
7446; SKX-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:1.00]
7447; SKX-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
7448; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7449; SKX-NEXT:    retq # sched: [7:1.00]
7450  %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
7451  store <4 x i1> %x, <4 x i1>*  %ptr, align 4
7452  ret void
7453}
7454
7455define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
7456; GENERIC-LABEL: store_v8i1:
7457; GENERIC:       # %bb.0:
7458; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
7459; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
7460; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:0.33]
7461; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7462; GENERIC-NEXT:    retq # sched: [1:1.00]
7463;
7464; SKX-LABEL: store_v8i1:
7465; SKX:       # %bb.0:
7466; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
7467; SKX-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:1.00]
7468; SKX-NEXT:    knotb %k0, %k0 # sched: [1:1.00]
7469; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7470; SKX-NEXT:    retq # sched: [7:1.00]
7471  %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
7472  store <8 x i1> %x, <8 x i1>*  %ptr, align 4
7473  ret void
7474}
7475
7476define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
7477; GENERIC-LABEL: store_v16i1:
7478; GENERIC:       # %bb.0:
7479; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
7480; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
7481; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
7482; GENERIC-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
7483; GENERIC-NEXT:    retq # sched: [1:1.00]
7484;
7485; SKX-LABEL: store_v16i1:
7486; SKX:       # %bb.0:
7487; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
7488; SKX-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:1.00]
7489; SKX-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
7490; SKX-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
7491; SKX-NEXT:    retq # sched: [7:1.00]
7492  %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
7493  store <16 x i1> %x, <16 x i1>*  %ptr, align 4
7494  ret void
7495}
7496
7497;void f2(int);
7498;void f1(int c)
7499;{
7500;  static int v = 0;
7501;  if (v == 0)
7502;    v = 1;
7503;  else
7504;    v = 0;
7505;  f2(v);
7506;}
7507
7508@f1.v = internal unnamed_addr global i1 false, align 4
7509
7510define void @f1(i32 %c) {
7511; GENERIC-LABEL: f1:
7512; GENERIC:       # %bb.0: # %entry
7513; GENERIC-NEXT:    movzbl {{.*}}(%rip), %edi # sched: [5:0.50]
7514; GENERIC-NEXT:    xorl $1, %edi # sched: [1:0.33]
7515; GENERIC-NEXT:    movb %dil, {{.*}}(%rip) # sched: [1:1.00]
7516; GENERIC-NEXT:    jmp f2 # TAILCALL
7517;
7518; SKX-LABEL: f1:
7519; SKX:       # %bb.0: # %entry
7520; SKX-NEXT:    movzbl {{.*}}(%rip), %edi # sched: [5:0.50]
7521; SKX-NEXT:    xorl $1, %edi # sched: [1:0.25]
7522; SKX-NEXT:    movb %dil, {{.*}}(%rip) # sched: [1:1.00]
7523; SKX-NEXT:    jmp f2 # TAILCALL
7524entry:
7525  %.b1 = load i1, i1* @f1.v, align 4
7526  %not..b1 = xor i1 %.b1, true
7527  store i1 %not..b1, i1* @f1.v, align 4
7528  %0 = zext i1 %not..b1 to i32
7529  tail call void @f2(i32 %0) #2
7530  ret void
7531}
7532
7533declare void @f2(i32) #1
7534
7535define void @store_i16_i1(i16 %x, i1 *%y) {
7536; GENERIC-LABEL: store_i16_i1:
7537; GENERIC:       # %bb.0:
7538; GENERIC-NEXT:    andl $1, %edi # sched: [1:0.33]
7539; GENERIC-NEXT:    movb %dil, (%rsi) # sched: [1:1.00]
7540; GENERIC-NEXT:    retq # sched: [1:1.00]
7541;
7542; SKX-LABEL: store_i16_i1:
7543; SKX:       # %bb.0:
7544; SKX-NEXT:    andl $1, %edi # sched: [1:0.25]
7545; SKX-NEXT:    movb %dil, (%rsi) # sched: [1:1.00]
7546; SKX-NEXT:    retq # sched: [7:1.00]
7547  %c = trunc i16 %x to i1
7548  store i1 %c, i1* %y
7549  ret void
7550}
7551
7552define void @store_i8_i1(i8 %x, i1 *%y) {
7553; GENERIC-LABEL: store_i8_i1:
7554; GENERIC:       # %bb.0:
7555; GENERIC-NEXT:    andl $1, %edi # sched: [1:0.33]
7556; GENERIC-NEXT:    movb %dil, (%rsi) # sched: [1:1.00]
7557; GENERIC-NEXT:    retq # sched: [1:1.00]
7558;
7559; SKX-LABEL: store_i8_i1:
7560; SKX:       # %bb.0:
7561; SKX-NEXT:    andl $1, %edi # sched: [1:0.25]
7562; SKX-NEXT:    movb %dil, (%rsi) # sched: [1:1.00]
7563; SKX-NEXT:    retq # sched: [7:1.00]
7564  %c = trunc i8 %x to i1
7565  store i1 %c, i1* %y
7566  ret void
7567}
7568
7569define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
7570; GENERIC-LABEL: test_build_vec_v32i1:
7571; GENERIC:       # %bb.0:
7572; GENERIC-NEXT:    movl $1497715861, %eax # imm = 0x59455495
7573; GENERIC-NEXT:    # sched: [1:0.33]
7574; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
7575; GENERIC-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
7576; GENERIC-NEXT:    retq # sched: [1:1.00]
7577;
7578; SKX-LABEL: test_build_vec_v32i1:
7579; SKX:       # %bb.0:
7580; SKX-NEXT:    movl $1497715861, %eax # imm = 0x59455495
7581; SKX-NEXT:    # sched: [1:0.25]
7582; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
7583; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
7584; SKX-NEXT:    retq # sched: [7:1.00]
7585  %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
7586  ret <32 x i16> %ret
7587}
7588
7589define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
7590; GENERIC-LABEL: test_build_vec_v64i1:
7591; GENERIC:       # %bb.0:
7592; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:0.50]
7593; GENERIC-NEXT:    retq # sched: [1:1.00]
7594;
7595; SKX-LABEL: test_build_vec_v64i1:
7596; SKX:       # %bb.0:
7597; SKX-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:1.00]
7598; SKX-NEXT:    retq # sched: [7:1.00]
7599  %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
7600  ret <64 x i8> %ret
7601}
7602
7603define void @ktest_1(<8 x double> %in, double * %base) {
7604; GENERIC-LABEL: ktest_1:
7605; GENERIC:       # %bb.0:
7606; GENERIC-NEXT:    vmovupd (%rdi), %zmm1 # sched: [7:0.50]
7607; GENERIC-NEXT:    vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
7608; GENERIC-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [7:0.50]
7609; GENERIC-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
7610; GENERIC-NEXT:    kortestb %k0, %k0 # sched: [1:0.33]
7611; GENERIC-NEXT:    je .LBB410_2 # sched: [1:1.00]
7612; GENERIC-NEXT:  # %bb.1: # %L1
7613; GENERIC-NEXT:    vmovapd %zmm0, (%rdi) # sched: [1:1.00]
7614; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
7615; GENERIC-NEXT:    retq # sched: [1:1.00]
7616; GENERIC-NEXT:  .LBB410_2: # %L2
7617; GENERIC-NEXT:    vmovapd %zmm0, 8(%rdi) # sched: [1:1.00]
7618; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
7619; GENERIC-NEXT:    retq # sched: [1:1.00]
7620;
7621; SKX-LABEL: ktest_1:
7622; SKX:       # %bb.0:
7623; SKX-NEXT:    vmovupd (%rdi), %zmm1 # sched: [8:0.50]
7624; SKX-NEXT:    vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
7625; SKX-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50]
7626; SKX-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
7627; SKX-NEXT:    kortestb %k0, %k0 # sched: [3:1.00]
7628; SKX-NEXT:    je .LBB410_2 # sched: [1:0.50]
7629; SKX-NEXT:  # %bb.1: # %L1
7630; SKX-NEXT:    vmovapd %zmm0, (%rdi) # sched: [1:1.00]
7631; SKX-NEXT:    vzeroupper # sched: [4:1.00]
7632; SKX-NEXT:    retq # sched: [7:1.00]
7633; SKX-NEXT:  .LBB410_2: # %L2
7634; SKX-NEXT:    vmovapd %zmm0, 8(%rdi) # sched: [1:1.00]
7635; SKX-NEXT:    vzeroupper # sched: [4:1.00]
7636; SKX-NEXT:    retq # sched: [7:1.00]
7637  %addr1 = getelementptr double, double * %base, i64 0
7638  %addr2 = getelementptr double, double * %base, i64 1
7639
7640  %vaddr1 = bitcast double* %addr1 to <8 x double>*
7641  %vaddr2 = bitcast double* %addr2 to <8 x double>*
7642
7643  %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
7644  %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
7645
7646  %sel1 = fcmp ogt <8 x double>%in, %val1
7647  %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
7648  %sel2 = fcmp olt <8 x double> %in, %val3
7649  %sel3 = and <8 x i1> %sel1, %sel2
7650
7651  %int_sel3 = bitcast <8 x i1> %sel3 to i8
7652  %res = icmp eq i8 %int_sel3, zeroinitializer
7653  br i1 %res, label %L2, label %L1
7654L1:
7655  store <8 x double> %in, <8 x double>* %vaddr1
7656  br label %End
7657L2:
7658  store <8 x double> %in, <8 x double>* %vaddr2
7659  br label %End
7660End:
7661  ret void
7662}
7663
7664define void @ktest_2(<32 x float> %in, float * %base) {
7665;
7666; GENERIC-LABEL: ktest_2:
7667; GENERIC:       # %bb.0:
7668; GENERIC-NEXT:    vmovups (%rdi), %zmm2 # sched: [7:0.50]
7669; GENERIC-NEXT:    vmovups 64(%rdi), %zmm3 # sched: [7:0.50]
7670; GENERIC-NEXT:    vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
7671; GENERIC-NEXT:    vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
7672; GENERIC-NEXT:    kunpckwd %k1, %k2, %k0 # sched: [1:1.00]
7673; GENERIC-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [7:0.50]
7674; GENERIC-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [7:0.50]
7675; GENERIC-NEXT:    vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
7676; GENERIC-NEXT:    vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
7677; GENERIC-NEXT:    kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
7678; GENERIC-NEXT:    kortestd %k1, %k0 # sched: [1:0.33]
7679; GENERIC-NEXT:    je .LBB411_2 # sched: [1:1.00]
7680; GENERIC-NEXT:  # %bb.1: # %L1
7681; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
7682; GENERIC-NEXT:    vmovaps %zmm1, 64(%rdi) # sched: [1:1.00]
7683; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
7684; GENERIC-NEXT:    retq # sched: [1:1.00]
7685; GENERIC-NEXT:  .LBB411_2: # %L2
7686; GENERIC-NEXT:    vmovaps %zmm0, 4(%rdi) # sched: [1:1.00]
7687; GENERIC-NEXT:    vmovaps %zmm1, 68(%rdi) # sched: [1:1.00]
7688; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
7689; GENERIC-NEXT:    retq # sched: [1:1.00]
7690;
7691; SKX-LABEL: ktest_2:
7692; SKX:       # %bb.0:
7693; SKX-NEXT:    vmovups (%rdi), %zmm2 # sched: [8:0.50]
7694; SKX-NEXT:    vmovups 64(%rdi), %zmm3 # sched: [8:0.50]
7695; SKX-NEXT:    vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
7696; SKX-NEXT:    vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
7697; SKX-NEXT:    kunpckwd %k1, %k2, %k0 # sched: [3:1.00]
7698; SKX-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50]
7699; SKX-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50]
7700; SKX-NEXT:    vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
7701; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
7702; SKX-NEXT:    kunpckwd %k1, %k2, %k1 # sched: [3:1.00]
7703; SKX-NEXT:    kortestd %k1, %k0 # sched: [3:1.00]
7704; SKX-NEXT:    je .LBB411_2 # sched: [1:0.50]
7705; SKX-NEXT:  # %bb.1: # %L1
7706; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
7707; SKX-NEXT:    vmovaps %zmm1, 64(%rdi) # sched: [1:1.00]
7708; SKX-NEXT:    vzeroupper # sched: [4:1.00]
7709; SKX-NEXT:    retq # sched: [7:1.00]
7710; SKX-NEXT:  .LBB411_2: # %L2
7711; SKX-NEXT:    vmovaps %zmm0, 4(%rdi) # sched: [1:1.00]
7712; SKX-NEXT:    vmovaps %zmm1, 68(%rdi) # sched: [1:1.00]
7713; SKX-NEXT:    vzeroupper # sched: [4:1.00]
7714; SKX-NEXT:    retq # sched: [7:1.00]
7715  %addr1 = getelementptr float, float * %base, i64 0
7716  %addr2 = getelementptr float, float * %base, i64 1
7717
7718  %vaddr1 = bitcast float* %addr1 to <32 x float>*
7719  %vaddr2 = bitcast float* %addr2 to <32 x float>*
7720
7721  %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
7722  %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
7723
7724  %sel1 = fcmp ogt <32 x float>%in, %val1
7725  %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
7726  %sel2 = fcmp olt <32 x float> %in, %val3
7727  %sel3 = or <32 x i1> %sel1, %sel2
7728
7729  %int_sel3 = bitcast <32 x i1> %sel3 to i32
7730  %res = icmp eq i32 %int_sel3, zeroinitializer
7731  br i1 %res, label %L2, label %L1
7732L1:
7733  store <32 x float> %in, <32 x float>* %vaddr1
7734  br label %End
7735L2:
7736  store <32 x float> %in, <32 x float>* %vaddr2
7737  br label %End
7738End:
7739  ret void
7740}
7741
7742define <8 x i64> @load_8i1(<8 x i1>* %a) {
7743; GENERIC-LABEL: load_8i1:
7744; GENERIC:       # %bb.0:
7745; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
7746; GENERIC-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.33]
7747; GENERIC-NEXT:    retq # sched: [1:1.00]
7748;
7749; SKX-LABEL: load_8i1:
7750; SKX:       # %bb.0:
7751; SKX-NEXT:    kmovb (%rdi), %k0 # sched: [7:1.00]
7752; SKX-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.25]
7753; SKX-NEXT:    retq # sched: [7:1.00]
7754  %b = load <8 x i1>, <8 x i1>* %a
7755  %c = sext <8 x i1> %b to <8 x i64>
7756  ret <8 x i64> %c
7757}
7758
7759define <16 x i32> @load_16i1(<16 x i1>* %a) {
7760; GENERIC-LABEL: load_16i1:
7761; GENERIC:       # %bb.0:
7762; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [5:0.50]
7763; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
7764; GENERIC-NEXT:    retq # sched: [1:1.00]
7765;
7766; SKX-LABEL: load_16i1:
7767; SKX:       # %bb.0:
7768; SKX-NEXT:    kmovw (%rdi), %k0 # sched: [7:1.00]
7769; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
7770; SKX-NEXT:    retq # sched: [7:1.00]
7771  %b = load <16 x i1>, <16 x i1>* %a
7772  %c = sext <16 x i1> %b to <16 x i32>
7773  ret <16 x i32> %c
7774}
7775
7776define <2 x i16> @load_2i1(<2 x i1>* %a) {
7777; GENERIC-LABEL: load_2i1:
7778; GENERIC:       # %bb.0:
7779; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
7780; GENERIC-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.33]
7781; GENERIC-NEXT:    retq # sched: [1:1.00]
7782;
7783; SKX-LABEL: load_2i1:
7784; SKX:       # %bb.0:
7785; SKX-NEXT:    kmovb (%rdi), %k0 # sched: [7:1.00]
7786; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
7787; SKX-NEXT:    retq # sched: [7:1.00]
7788  %b = load <2 x i1>, <2 x i1>* %a
7789  %c = sext <2 x i1> %b to <2 x i16>
7790  ret <2 x i16> %c
7791}
7792
7793define <4 x i16> @load_4i1(<4 x i1>* %a) {
7794; GENERIC-LABEL: load_4i1:
7795; GENERIC:       # %bb.0:
7796; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
7797; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
7798; GENERIC-NEXT:    retq # sched: [1:1.00]
7799;
7800; SKX-LABEL: load_4i1:
7801; SKX:       # %bb.0:
7802; SKX-NEXT:    kmovb (%rdi), %k0 # sched: [7:1.00]
7803; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
7804; SKX-NEXT:    retq # sched: [7:1.00]
7805  %b = load <4 x i1>, <4 x i1>* %a
7806  %c = sext <4 x i1> %b to <4 x i16>
7807  ret <4 x i16> %c
7808}
7809
7810define <32 x i16> @load_32i1(<32 x i1>* %a) {
7811; GENERIC-LABEL: load_32i1:
7812; GENERIC:       # %bb.0:
7813; GENERIC-NEXT:    kmovd (%rdi), %k0 # sched: [5:0.50]
7814; GENERIC-NEXT:    vpmovm2w %k0, %zmm0 # sched: [1:0.33]
7815; GENERIC-NEXT:    retq # sched: [1:1.00]
7816;
7817; SKX-LABEL: load_32i1:
7818; SKX:       # %bb.0:
7819; SKX-NEXT:    kmovd (%rdi), %k0 # sched: [7:1.00]
7820; SKX-NEXT:    vpmovm2w %k0, %zmm0 # sched: [1:0.25]
7821; SKX-NEXT:    retq # sched: [7:1.00]
7822  %b = load <32 x i1>, <32 x i1>* %a
7823  %c = sext <32 x i1> %b to <32 x i16>
7824  ret <32 x i16> %c
7825}
7826
7827define <64 x i8> @load_64i1(<64 x i1>* %a) {
7828; GENERIC-LABEL: load_64i1:
7829; GENERIC:       # %bb.0:
7830; GENERIC-NEXT:    kmovq (%rdi), %k0 # sched: [5:0.50]
7831; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
7832; GENERIC-NEXT:    retq # sched: [1:1.00]
7833;
7834; SKX-LABEL: load_64i1:
7835; SKX:       # %bb.0:
7836; SKX-NEXT:    kmovq (%rdi), %k0 # sched: [7:1.00]
7837; SKX-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.25]
7838; SKX-NEXT:    retq # sched: [7:1.00]
7839  %b = load <64 x i1>, <64 x i1>* %a
7840  %c = sext <64 x i1> %b to <64 x i8>
7841  ret <64 x i8> %c
7842}
7843
7844define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
7845; GENERIC-LABEL: store_8i1:
7846; GENERIC:       # %bb.0:
7847; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
7848; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
7849; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7850; GENERIC-NEXT:    retq # sched: [1:1.00]
7851;
7852; SKX-LABEL: store_8i1:
7853; SKX:       # %bb.0:
7854; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
7855; SKX-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:1.00]
7856; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7857; SKX-NEXT:    retq # sched: [7:1.00]
7858  store <8 x i1> %v, <8 x i1>* %a
7859  ret void
7860}
7861
7862define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
7863; GENERIC-LABEL: store_8i1_1:
7864; GENERIC:       # %bb.0:
7865; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
7866; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
7867; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7868; GENERIC-NEXT:    retq # sched: [1:1.00]
7869;
7870; SKX-LABEL: store_8i1_1:
7871; SKX:       # %bb.0:
7872; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
7873; SKX-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:1.00]
7874; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
7875; SKX-NEXT:    retq # sched: [7:1.00]
7876  %v1 = trunc <8 x i16> %v to <8 x i1>
7877  store <8 x i1> %v1, <8 x i1>* %a
7878  ret void
7879}
7880
7881define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
7882; GENERIC-LABEL: store_16i1:
7883; GENERIC:       # %bb.0:
7884; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
7885; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
7886; GENERIC-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
7887; GENERIC-NEXT:    retq # sched: [1:1.00]
7888;
7889; SKX-LABEL: store_16i1:
7890; SKX:       # %bb.0:
7891; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
7892; SKX-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:1.00]
7893; SKX-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
7894; SKX-NEXT:    retq # sched: [7:1.00]
7895  store <16 x i1> %v, <16 x i1>* %a
7896  ret void
7897}
7898
7899define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
7900; GENERIC-LABEL: store_32i1:
7901; GENERIC:       # %bb.0:
7902; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
7903; GENERIC-NEXT:    vpmovb2m %ymm0, %k0 # sched: [1:0.33]
7904; GENERIC-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
7905; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
7906; GENERIC-NEXT:    retq # sched: [1:1.00]
7907;
7908; SKX-LABEL: store_32i1:
7909; SKX:       # %bb.0:
7910; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
7911; SKX-NEXT:    vpmovb2m %ymm0, %k0 # sched: [1:1.00]
7912; SKX-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
7913; SKX-NEXT:    vzeroupper # sched: [4:1.00]
7914; SKX-NEXT:    retq # sched: [7:1.00]
7915  store <32 x i1> %v, <32 x i1>* %a
7916  ret void
7917}
7918
7919define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
7920; GENERIC-LABEL: store_32i1_1:
7921; GENERIC:       # %bb.0:
7922; GENERIC-NEXT:    vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00]
7923; GENERIC-NEXT:    vpmovw2m %zmm0, %k0 # sched: [1:0.33]
7924; GENERIC-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
7925; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
7926; GENERIC-NEXT:    retq # sched: [1:1.00]
7927;
7928; SKX-LABEL: store_32i1_1:
7929; SKX:       # %bb.0:
7930; SKX-NEXT:    vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00]
7931; SKX-NEXT:    vpmovw2m %zmm0, %k0 # sched: [1:1.00]
7932; SKX-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
7933; SKX-NEXT:    vzeroupper # sched: [4:1.00]
7934; SKX-NEXT:    retq # sched: [7:1.00]
7935  %v1 = trunc <32 x i16> %v to <32 x i1>
7936  store <32 x i1> %v1, <32 x i1>* %a
7937  ret void
7938}
7939
7940
7941define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
7942;
7943; GENERIC-LABEL: store_64i1:
7944; GENERIC:       # %bb.0:
7945; GENERIC-NEXT:    vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00]
7946; GENERIC-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:0.33]
7947; GENERIC-NEXT:    kmovq %k0, (%rdi) # sched: [1:1.00]
7948; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
7949; GENERIC-NEXT:    retq # sched: [1:1.00]
7950;
7951; SKX-LABEL: store_64i1:
7952; SKX:       # %bb.0:
7953; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00]
7954; SKX-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:1.00]
7955; SKX-NEXT:    kmovq %k0, (%rdi) # sched: [1:1.00]
7956; SKX-NEXT:    vzeroupper # sched: [4:1.00]
7957; SKX-NEXT:    retq # sched: [7:1.00]
7958  store <64 x i1> %v, <64 x i1>* %a
7959  ret void
7960}
7961
7962define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
7963; GENERIC-LABEL: test_bitcast_v8i1_zext:
7964; GENERIC:       # %bb.0:
7965; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33]
7966; GENERIC-NEXT:    kmovb %k0, %eax # sched: [1:0.33]
7967; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
7968; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
7969; GENERIC-NEXT:    retq # sched: [1:1.00]
7970;
7971; SKX-LABEL: test_bitcast_v8i1_zext:
7972; SKX:       # %bb.0:
7973; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
7974; SKX-NEXT:    kmovb %k0, %eax # sched: [3:1.00]
7975; SKX-NEXT:    addl %eax, %eax # sched: [1:0.25]
7976; SKX-NEXT:    vzeroupper # sched: [4:1.00]
7977; SKX-NEXT:    retq # sched: [7:1.00]
7978   %v1 = icmp eq <16 x i32> %a, zeroinitializer
7979   %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7980   %mask1 = bitcast <8 x i1> %mask to i8
7981   %val = zext i8 %mask1 to i32
7982   %val1 = add i32 %val, %val
7983   ret i32 %val1
7984}
7985
7986define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
7987; GENERIC-LABEL: test_bitcast_v16i1_zext:
7988; GENERIC:       # %bb.0:
7989; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33]
7990; GENERIC-NEXT:    kmovw %k0, %eax # sched: [1:0.33]
7991; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
7992; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
7993; GENERIC-NEXT:    retq # sched: [1:1.00]
7994;
7995; SKX-LABEL: test_bitcast_v16i1_zext:
7996; SKX:       # %bb.0:
7997; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
7998; SKX-NEXT:    kmovw %k0, %eax # sched: [3:1.00]
7999; SKX-NEXT:    addl %eax, %eax # sched: [1:0.25]
8000; SKX-NEXT:    vzeroupper # sched: [4:1.00]
8001; SKX-NEXT:    retq # sched: [7:1.00]
8002   %v1 = icmp eq <16 x i32> %a, zeroinitializer
8003   %mask1 = bitcast <16 x i1> %v1 to i16
8004   %val = zext i16 %mask1 to i32
8005   %val1 = add i32 %val, %val
8006   ret i32 %val1
8007}
8008
8009define i16 @test_v16i1_add(i16 %x, i16 %y) {
8010; GENERIC-LABEL: test_v16i1_add:
8011; GENERIC:       # %bb.0:
8012; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
8013; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
8014; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
8015; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
8016; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
8017; GENERIC-NEXT:    retq # sched: [1:1.00]
8018;
8019; SKX-LABEL: test_v16i1_add:
8020; SKX:       # %bb.0:
8021; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
8022; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
8023; SKX-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
8024; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
8025; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
8026; SKX-NEXT:    retq # sched: [7:1.00]
8027  %m0 = bitcast i16 %x to <16 x i1>
8028  %m1 = bitcast i16 %y to <16 x i1>
8029  %m2 = add <16 x i1> %m0,  %m1
8030  %ret = bitcast <16 x i1> %m2 to i16
8031  ret i16 %ret
8032}
8033
8034define i16 @test_v16i1_sub(i16 %x, i16 %y) {
8035; GENERIC-LABEL: test_v16i1_sub:
8036; GENERIC:       # %bb.0:
8037; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
8038; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
8039; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
8040; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
8041; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
8042; GENERIC-NEXT:    retq # sched: [1:1.00]
8043;
8044; SKX-LABEL: test_v16i1_sub:
8045; SKX:       # %bb.0:
8046; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
8047; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
8048; SKX-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
8049; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
8050; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
8051; SKX-NEXT:    retq # sched: [7:1.00]
8052  %m0 = bitcast i16 %x to <16 x i1>
8053  %m1 = bitcast i16 %y to <16 x i1>
8054  %m2 = sub <16 x i1> %m0,  %m1
8055  %ret = bitcast <16 x i1> %m2 to i16
8056  ret i16 %ret
8057}
8058
8059define i16 @test_v16i1_mul(i16 %x, i16 %y) {
8060; GENERIC-LABEL: test_v16i1_mul:
8061; GENERIC:       # %bb.0:
8062; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
8063; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
8064; GENERIC-NEXT:    kandw %k1, %k0, %k0 # sched: [1:0.33]
8065; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
8066; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
8067; GENERIC-NEXT:    retq # sched: [1:1.00]
8068;
8069; SKX-LABEL: test_v16i1_mul:
8070; SKX:       # %bb.0:
8071; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
8072; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
8073; SKX-NEXT:    kandw %k1, %k0, %k0 # sched: [1:1.00]
8074; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
8075; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
8076; SKX-NEXT:    retq # sched: [7:1.00]
8077  %m0 = bitcast i16 %x to <16 x i1>
8078  %m1 = bitcast i16 %y to <16 x i1>
8079  %m2 = mul <16 x i1> %m0,  %m1
8080  %ret = bitcast <16 x i1> %m2 to i16
8081  ret i16 %ret
8082}
8083
8084define i8 @test_v8i1_add(i8 %x, i8 %y) {
8085; GENERIC-LABEL: test_v8i1_add:
8086; GENERIC:       # %bb.0:
8087; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
8088; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
8089; GENERIC-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:0.33]
8090; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
8091; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
8092; GENERIC-NEXT:    retq # sched: [1:1.00]
8093;
8094; SKX-LABEL: test_v8i1_add:
8095; SKX:       # %bb.0:
8096; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
8097; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
8098; SKX-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:1.00]
8099; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
8100; SKX-NEXT:    # kill: def $al killed $al killed $eax
8101; SKX-NEXT:    retq # sched: [7:1.00]
8102  %m0 = bitcast i8 %x to <8 x i1>
8103  %m1 = bitcast i8 %y to <8 x i1>
8104  %m2 = add <8 x i1> %m0,  %m1
8105  %ret = bitcast <8 x i1> %m2 to i8
8106  ret i8 %ret
8107}
8108
8109define i8 @test_v8i1_sub(i8 %x, i8 %y) {
8110; GENERIC-LABEL: test_v8i1_sub:
8111; GENERIC:       # %bb.0:
8112; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
8113; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
8114; GENERIC-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:0.33]
8115; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
8116; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
8117; GENERIC-NEXT:    retq # sched: [1:1.00]
8118;
8119; SKX-LABEL: test_v8i1_sub:
8120; SKX:       # %bb.0:
8121; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
8122; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
8123; SKX-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:1.00]
8124; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
8125; SKX-NEXT:    # kill: def $al killed $al killed $eax
8126; SKX-NEXT:    retq # sched: [7:1.00]
8127  %m0 = bitcast i8 %x to <8 x i1>
8128  %m1 = bitcast i8 %y to <8 x i1>
8129  %m2 = sub <8 x i1> %m0,  %m1
8130  %ret = bitcast <8 x i1> %m2 to i8
8131  ret i8 %ret
8132}
8133
8134define i8 @test_v8i1_mul(i8 %x, i8 %y) {
8135; GENERIC-LABEL: test_v8i1_mul:
8136; GENERIC:       # %bb.0:
8137; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
8138; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
8139; GENERIC-NEXT:    kandb %k1, %k0, %k0 # sched: [1:0.33]
8140; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
8141; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
8142; GENERIC-NEXT:    retq # sched: [1:1.00]
8143;
8144; SKX-LABEL: test_v8i1_mul:
8145; SKX:       # %bb.0:
8146; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
8147; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
8148; SKX-NEXT:    kandb %k1, %k0, %k0 # sched: [1:1.00]
8149; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
8150; SKX-NEXT:    # kill: def $al killed $al killed $eax
8151; SKX-NEXT:    retq # sched: [7:1.00]
8152  %m0 = bitcast i8 %x to <8 x i1>
8153  %m1 = bitcast i8 %y to <8 x i1>
8154  %m2 = mul <8 x i1> %m0,  %m1
8155  %ret = bitcast <8 x i1> %m2 to i8
8156  ret i8 %ret
8157}
8158
8159define   <16 x i32> @_inreg16xi32(i32 %a) {
8160; GENERIC-LABEL: _inreg16xi32:
8161; GENERIC:       # %bb.0:
8162; GENERIC-NEXT:    vpbroadcastd %edi, %zmm0 # sched: [1:1.00]
8163; GENERIC-NEXT:    retq # sched: [1:1.00]
8164;
8165; SKX-LABEL: _inreg16xi32:
8166; SKX:       # %bb.0:
8167; SKX-NEXT:    vpbroadcastd %edi, %zmm0 # sched: [3:1.00]
8168; SKX-NEXT:    retq # sched: [7:1.00]
8169  %b = insertelement <16 x i32> undef, i32 %a, i32 0
8170  %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
8171  ret <16 x i32> %c
8172}
8173
8174define   <8 x i64> @_inreg8xi64(i64 %a) {
8175; GENERIC-LABEL: _inreg8xi64:
8176; GENERIC:       # %bb.0:
8177; GENERIC-NEXT:    vpbroadcastq %rdi, %zmm0 # sched: [1:1.00]
8178; GENERIC-NEXT:    retq # sched: [1:1.00]
8179;
8180; SKX-LABEL: _inreg8xi64:
8181; SKX:       # %bb.0:
8182; SKX-NEXT:    vpbroadcastq %rdi, %zmm0 # sched: [3:1.00]
8183; SKX-NEXT:    retq # sched: [7:1.00]
8184  %b = insertelement <8 x i64> undef, i64 %a, i32 0
8185  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
8186  ret <8 x i64> %c
8187}
8188
8189define   <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
8190; GENERIC-LABEL: _ss16xfloat_v4:
8191; GENERIC:       # %bb.0:
8192; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8193; GENERIC-NEXT:    retq # sched: [1:1.00]
8194;
8195; SKX-LABEL: _ss16xfloat_v4:
8196; SKX:       # %bb.0:
8197; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8198; SKX-NEXT:    retq # sched: [7:1.00]
8199  %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer
8200  ret <16 x float> %b
8201}
8202
8203define   <16 x float> @_inreg16xfloat(float %a) {
8204; GENERIC-LABEL: _inreg16xfloat:
8205; GENERIC:       # %bb.0:
8206; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8207; GENERIC-NEXT:    retq # sched: [1:1.00]
8208;
8209; SKX-LABEL: _inreg16xfloat:
8210; SKX:       # %bb.0:
8211; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8212; SKX-NEXT:    retq # sched: [7:1.00]
8213  %b = insertelement <16 x float> undef, float %a, i32 0
8214  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8215  ret <16 x float> %c
8216}
8217
8218define   <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
8219; GENERIC-LABEL: _ss16xfloat_mask:
8220; GENERIC:       # %bb.0:
8221; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
8222; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00]
8223; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
8224; GENERIC-NEXT:    retq # sched: [1:1.00]
8225;
8226; SKX-LABEL: _ss16xfloat_mask:
8227; SKX:       # %bb.0:
8228; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
8229; SKX-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1} # sched: [3:1.00]
8230; SKX-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:0.33]
8231; SKX-NEXT:    retq # sched: [7:1.00]
8232  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
8233  %b = insertelement <16 x float> undef, float %a, i32 0
8234  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8235  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
8236  ret <16 x float> %r
8237}
8238
8239define   <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
8240; GENERIC-LABEL: _ss16xfloat_maskz:
8241; GENERIC:       # %bb.0:
8242; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
8243; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
8244; GENERIC-NEXT:    retq # sched: [1:1.00]
8245;
8246; SKX-LABEL: _ss16xfloat_maskz:
8247; SKX:       # %bb.0:
8248; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
8249; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
8250; SKX-NEXT:    retq # sched: [7:1.00]
8251  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
8252  %b = insertelement <16 x float> undef, float %a, i32 0
8253  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8254  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
8255  ret <16 x float> %r
8256}
8257
8258define   <16 x float> @_ss16xfloat_load(float* %a.ptr) {
8259; GENERIC-LABEL: _ss16xfloat_load:
8260; GENERIC:       # %bb.0:
8261; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 # sched: [8:1.00]
8262; GENERIC-NEXT:    retq # sched: [1:1.00]
8263;
8264; SKX-LABEL: _ss16xfloat_load:
8265; SKX:       # %bb.0:
8266; SKX-NEXT:    vbroadcastss (%rdi), %zmm0 # sched: [8:0.50]
8267; SKX-NEXT:    retq # sched: [7:1.00]
8268  %a = load float, float* %a.ptr
8269  %b = insertelement <16 x float> undef, float %a, i32 0
8270  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8271  ret <16 x float> %c
8272}
8273
8274define   <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
8275; GENERIC-LABEL: _ss16xfloat_mask_load:
8276; GENERIC:       # %bb.0:
8277; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
8278; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00]
8279; GENERIC-NEXT:    retq # sched: [1:1.00]
8280;
8281; SKX-LABEL: _ss16xfloat_mask_load:
8282; SKX:       # %bb.0:
8283; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
8284; SKX-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:0.50]
8285; SKX-NEXT:    retq # sched: [7:1.00]
8286  %a = load float, float* %a.ptr
8287  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
8288  %b = insertelement <16 x float> undef, float %a, i32 0
8289  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8290  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
8291  ret <16 x float> %r
8292}
8293
8294define   <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
8295; GENERIC-LABEL: _ss16xfloat_maskz_load:
8296; GENERIC:       # %bb.0:
8297; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
8298; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
8299; GENERIC-NEXT:    retq # sched: [1:1.00]
8300;
8301; SKX-LABEL: _ss16xfloat_maskz_load:
8302; SKX:       # %bb.0:
8303; SKX-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
8304; SKX-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
8305; SKX-NEXT:    retq # sched: [7:1.00]
8306  %a = load float, float* %a.ptr
8307  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
8308  %b = insertelement <16 x float> undef, float %a, i32 0
8309  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8310  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
8311  ret <16 x float> %r
8312}
8313
8314define   <8 x double> @_inreg8xdouble(double %a) {
8315; GENERIC-LABEL: _inreg8xdouble:
8316; GENERIC:       # %bb.0:
8317; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
8318; GENERIC-NEXT:    retq # sched: [1:1.00]
8319;
8320; SKX-LABEL: _inreg8xdouble:
8321; SKX:       # %bb.0:
8322; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
8323; SKX-NEXT:    retq # sched: [7:1.00]
8324  %b = insertelement <8 x double> undef, double %a, i32 0
8325  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8326  ret <8 x double> %c
8327}
8328
8329define   <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
8330; GENERIC-LABEL: _sd8xdouble_mask:
8331; GENERIC:       # %bb.0:
8332; GENERIC-NEXT:    vptestmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
8333; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [1:1.00]
8334; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
8335; GENERIC-NEXT:    retq # sched: [1:1.00]
8336;
8337; SKX-LABEL: _sd8xdouble_mask:
8338; SKX:       # %bb.0:
8339; SKX-NEXT:    vptestmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
8340; SKX-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [3:1.00]
8341; SKX-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:0.33]
8342; SKX-NEXT:    retq # sched: [7:1.00]
8343  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
8344  %b = insertelement <8 x double> undef, double %a, i32 0
8345  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8346  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
8347  ret <8 x double> %r
8348}
8349
8350define   <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
8351; GENERIC-LABEL: _sd8xdouble_maskz:
8352; GENERIC:       # %bb.0:
8353; GENERIC-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
8354; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
8355; GENERIC-NEXT:    retq # sched: [1:1.00]
8356;
8357; SKX-LABEL: _sd8xdouble_maskz:
8358; SKX:       # %bb.0:
8359; SKX-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
8360; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
8361; SKX-NEXT:    retq # sched: [7:1.00]
8362  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
8363  %b = insertelement <8 x double> undef, double %a, i32 0
8364  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8365  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
8366  ret <8 x double> %r
8367}
8368
8369define   <8 x double> @_sd8xdouble_load(double* %a.ptr) {
8370; GENERIC-LABEL: _sd8xdouble_load:
8371; GENERIC:       # %bb.0:
8372; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 # sched: [8:1.00]
8373; GENERIC-NEXT:    retq # sched: [1:1.00]
8374;
8375; SKX-LABEL: _sd8xdouble_load:
8376; SKX:       # %bb.0:
8377; SKX-NEXT:    vbroadcastsd (%rdi), %zmm0 # sched: [8:0.50]
8378; SKX-NEXT:    retq # sched: [7:1.00]
8379  %a = load double, double* %a.ptr
8380  %b = insertelement <8 x double> undef, double %a, i32 0
8381  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8382  ret <8 x double> %c
8383}
8384
8385define   <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
8386; GENERIC-LABEL: _sd8xdouble_mask_load:
8387; GENERIC:       # %bb.0:
8388; GENERIC-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
8389; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
8390; GENERIC-NEXT:    retq # sched: [1:1.00]
8391;
8392; SKX-LABEL: _sd8xdouble_mask_load:
8393; SKX:       # %bb.0:
8394; SKX-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
8395; SKX-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
8396; SKX-NEXT:    retq # sched: [7:1.00]
8397  %a = load double, double* %a.ptr
8398  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
8399  %b = insertelement <8 x double> undef, double %a, i32 0
8400  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8401  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
8402  ret <8 x double> %r
8403}
8404
8405define   <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
8406; GENERIC-LABEL: _sd8xdouble_maskz_load:
8407; GENERIC:       # %bb.0:
8408; GENERIC-NEXT:    vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
8409; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
8410; GENERIC-NEXT:    retq # sched: [1:1.00]
8411;
8412; SKX-LABEL: _sd8xdouble_maskz_load:
8413; SKX:       # %bb.0:
8414; SKX-NEXT:    vptestmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
8415; SKX-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
8416; SKX-NEXT:    retq # sched: [7:1.00]
8417  %a = load double, double* %a.ptr
8418  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
8419  %b = insertelement <8 x double> undef, double %a, i32 0
8420  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8421  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
8422  ret <8 x double> %r
8423}
8424
8425define   <16 x i32> @_xmm16xi32(<16 x i32> %a) {
8426; GENERIC-LABEL: _xmm16xi32:
8427; GENERIC:       # %bb.0:
8428; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8429; GENERIC-NEXT:    retq # sched: [1:1.00]
8430;
8431; SKX-LABEL: _xmm16xi32:
8432; SKX:       # %bb.0:
8433; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8434; SKX-NEXT:    retq # sched: [7:1.00]
8435  %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
8436  ret <16 x i32> %b
8437}
8438
8439define   <16 x float> @_xmm16xfloat(<16 x float> %a) {
8440; GENERIC-LABEL: _xmm16xfloat:
8441; GENERIC:       # %bb.0:
8442; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8443; GENERIC-NEXT:    retq # sched: [1:1.00]
8444;
8445; SKX-LABEL: _xmm16xfloat:
8446; SKX:       # %bb.0:
8447; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8448; SKX-NEXT:    retq # sched: [7:1.00]
8449  %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
8450  ret <16 x float> %b
8451}
8452
8453define <16 x i32> @test_vbroadcast() {
8454; GENERIC-LABEL: test_vbroadcast:
8455; GENERIC:       # %bb.0: # %entry
8456; GENERIC-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
8457; GENERIC-NEXT:    vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
8458; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
8459; GENERIC-NEXT:    knotw %k0, %k1 # sched: [1:0.33]
8460; GENERIC-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
8461; GENERIC-NEXT:    retq # sched: [1:1.00]
8462;
8463; SKX-LABEL: test_vbroadcast:
8464; SKX:       # %bb.0: # %entry
8465; SKX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
8466; SKX-NEXT:    vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
8467; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
8468; SKX-NEXT:    knotw %k0, %k1 # sched: [1:1.00]
8469; SKX-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
8470; SKX-NEXT:    retq # sched: [7:1.00]
8471entry:
8472  %0 = sext <16 x i1> zeroinitializer to <16 x i32>
8473  %1 = fcmp uno <16 x float> undef, zeroinitializer
8474  %2 = sext <16 x i1> %1 to <16 x i32>
8475  %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2
8476  ret <16 x i32> %3
8477}
8478
8479; We implement the set1 intrinsics with vector initializers.  Verify that the
8480; IR generated will produce broadcasts at the end.
8481define <8 x double> @test_set1_pd(double %d) #2 {
8482; GENERIC-LABEL: test_set1_pd:
8483; GENERIC:       # %bb.0: # %entry
8484; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
8485; GENERIC-NEXT:    retq # sched: [1:1.00]
8486;
8487; SKX-LABEL: test_set1_pd:
8488; SKX:       # %bb.0: # %entry
8489; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
8490; SKX-NEXT:    retq # sched: [7:1.00]
8491entry:
8492  %vecinit.i = insertelement <8 x double> undef, double %d, i32 0
8493  %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1
8494  %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2
8495  %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3
8496  %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4
8497  %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5
8498  %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6
8499  %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7
8500  ret <8 x double> %vecinit7.i
8501}
8502
8503define <8 x i64> @test_set1_epi64(i64 %d) #2 {
8504; GENERIC-LABEL: test_set1_epi64:
8505; GENERIC:       # %bb.0: # %entry
8506; GENERIC-NEXT:    vpbroadcastq %rdi, %zmm0 # sched: [1:1.00]
8507; GENERIC-NEXT:    retq # sched: [1:1.00]
8508;
8509; SKX-LABEL: test_set1_epi64:
8510; SKX:       # %bb.0: # %entry
8511; SKX-NEXT:    vpbroadcastq %rdi, %zmm0 # sched: [3:1.00]
8512; SKX-NEXT:    retq # sched: [7:1.00]
8513entry:
8514  %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0
8515  %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1
8516  %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2
8517  %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3
8518  %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4
8519  %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5
8520  %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6
8521  %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7
8522  ret <8 x i64> %vecinit7.i
8523}
8524
8525define <16 x float> @test_set1_ps(float %f) #2 {
8526; GENERIC-LABEL: test_set1_ps:
8527; GENERIC:       # %bb.0: # %entry
8528; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8529; GENERIC-NEXT:    retq # sched: [1:1.00]
8530;
8531; SKX-LABEL: test_set1_ps:
8532; SKX:       # %bb.0: # %entry
8533; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8534; SKX-NEXT:    retq # sched: [7:1.00]
8535entry:
8536  %vecinit.i = insertelement <16 x float> undef, float %f, i32 0
8537  %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1
8538  %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2
8539  %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3
8540  %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4
8541  %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5
8542  %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6
8543  %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7
8544  %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8
8545  %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9
8546  %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10
8547  %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11
8548  %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12
8549  %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13
8550  %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14
8551  %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15
8552  ret <16 x float> %vecinit15.i
8553}
8554
8555define <16 x i32> @test_set1_epi32(i32 %f) #2 {
8556; GENERIC-LABEL: test_set1_epi32:
8557; GENERIC:       # %bb.0: # %entry
8558; GENERIC-NEXT:    vpbroadcastd %edi, %zmm0 # sched: [1:1.00]
8559; GENERIC-NEXT:    retq # sched: [1:1.00]
8560;
8561; SKX-LABEL: test_set1_epi32:
8562; SKX:       # %bb.0: # %entry
8563; SKX-NEXT:    vpbroadcastd %edi, %zmm0 # sched: [3:1.00]
8564; SKX-NEXT:    retq # sched: [7:1.00]
8565entry:
8566  %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0
8567  %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1
8568  %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2
8569  %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3
8570  %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4
8571  %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5
8572  %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6
8573  %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7
8574  %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8
8575  %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9
8576  %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10
8577  %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11
8578  %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12
8579  %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13
8580  %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14
8581  %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15
8582  ret <16 x i32> %vecinit15.i
8583}
8584
8585; We implement the scalar broadcast intrinsics with vector initializers.
8586; Verify that the IR generated will produce the broadcast at the end.
8587define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
8588; GENERIC-LABEL: test_mm512_broadcastsd_pd:
8589; GENERIC:       # %bb.0: # %entry
8590; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
8591; GENERIC-NEXT:    retq # sched: [1:1.00]
8592;
8593; SKX-LABEL: test_mm512_broadcastsd_pd:
8594; SKX:       # %bb.0: # %entry
8595; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
8596; SKX-NEXT:    retq # sched: [7:1.00]
8597entry:
8598  %0 = extractelement <2 x double> %a, i32 0
8599  %vecinit.i = insertelement <8 x double> undef, double %0, i32 0
8600  %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1
8601  %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2
8602  %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3
8603  %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4
8604  %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5
8605  %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6
8606  %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7
8607  ret <8 x double> %vecinit7.i
8608}
8609
8610define <16 x float> @suff_test1(<8 x float>%a)  {
8611; GENERIC-LABEL: suff_test1:
8612; GENERIC:       # %bb.0:
8613; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8614; GENERIC-NEXT:    retq # sched: [1:1.00]
8615;
8616; SKX-LABEL: suff_test1:
8617; SKX:       # %bb.0:
8618; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8619; SKX-NEXT:    retq # sched: [7:1.00]
8620  %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer
8621  ret <16 x float>%res
8622}
8623
8624define <8 x double> @suff_test2(<4 x double>%a)  {
8625; GENERIC-LABEL: suff_test2:
8626; GENERIC:       # %bb.0:
8627; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
8628; GENERIC-NEXT:    retq # sched: [1:1.00]
8629;
8630; SKX-LABEL: suff_test2:
8631; SKX:       # %bb.0:
8632; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
8633; SKX-NEXT:    retq # sched: [7:1.00]
8634  %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer
8635  ret <8 x double>%res
8636}
8637
8638define <64 x i8> @_invec32xi8(<32 x i8>%a)  {
8639; GENERIC-LABEL: _invec32xi8:
8640; GENERIC:       # %bb.0:
8641; GENERIC-NEXT:    vpbroadcastb %xmm0, %zmm0 # sched: [1:1.00]
8642; GENERIC-NEXT:    retq # sched: [1:1.00]
8643;
8644; SKX-LABEL: _invec32xi8:
8645; SKX:       # %bb.0:
8646; SKX-NEXT:    vpbroadcastb %xmm0, %zmm0 # sched: [3:1.00]
8647; SKX-NEXT:    retq # sched: [7:1.00]
8648  %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer
8649  ret <64 x i8>%res
8650}
8651
8652define <32 x i16> @_invec16xi16(<16 x i16>%a)  {
8653; GENERIC-LABEL: _invec16xi16:
8654; GENERIC:       # %bb.0:
8655; GENERIC-NEXT:    vpbroadcastw %xmm0, %zmm0 # sched: [1:1.00]
8656; GENERIC-NEXT:    retq # sched: [1:1.00]
8657;
8658; SKX-LABEL: _invec16xi16:
8659; SKX:       # %bb.0:
8660; SKX-NEXT:    vpbroadcastw %xmm0, %zmm0 # sched: [3:1.00]
8661; SKX-NEXT:    retq # sched: [7:1.00]
8662  %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer
8663  ret <32 x i16>%res
8664}
8665
8666define <16 x i32> @_invec8xi32(<8 x i32>%a)  {
8667; GENERIC-LABEL: _invec8xi32:
8668; GENERIC:       # %bb.0:
8669; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8670; GENERIC-NEXT:    retq # sched: [1:1.00]
8671;
8672; SKX-LABEL: _invec8xi32:
8673; SKX:       # %bb.0:
8674; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8675; SKX-NEXT:    retq # sched: [7:1.00]
8676  %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer
8677  ret <16 x i32>%res
8678}
8679
8680define <8 x i64> @_invec4xi64(<4 x i64>%a)  {
8681; GENERIC-LABEL: _invec4xi64:
8682; GENERIC:       # %bb.0:
8683; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
8684; GENERIC-NEXT:    retq # sched: [1:1.00]
8685;
8686; SKX-LABEL: _invec4xi64:
8687; SKX:       # %bb.0:
8688; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
8689; SKX-NEXT:    retq # sched: [7:1.00]
8690  %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer
8691  ret <8 x i64>%res
8692}
8693
8694declare void @func_f32(float)
8695define <16 x float> @broadcast_ss_spill(float %x) {
8696; GENERIC-LABEL: broadcast_ss_spill:
8697; GENERIC:       # %bb.0:
8698; GENERIC-NEXT:    subq $24, %rsp # sched: [1:0.33]
8699; GENERIC-NEXT:    .cfi_def_cfa_offset 32
8700; GENERIC-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
8701; GENERIC-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
8702; GENERIC-NEXT:    callq func_f32
8703; GENERIC-NEXT:    vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
8704; GENERIC-NEXT:    addq $24, %rsp # sched: [1:0.33]
8705; GENERIC-NEXT:    .cfi_def_cfa_offset 8
8706; GENERIC-NEXT:    retq # sched: [1:1.00]
8707;
8708; SKX-LABEL: broadcast_ss_spill:
8709; SKX:       # %bb.0:
8710; SKX-NEXT:    subq $24, %rsp # sched: [1:0.25]
8711; SKX-NEXT:    .cfi_def_cfa_offset 32
8712; SKX-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
8713; SKX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
8714; SKX-NEXT:    callq func_f32
8715; SKX-NEXT:    vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
8716; SKX-NEXT:    addq $24, %rsp # sched: [1:0.25]
8717; SKX-NEXT:    .cfi_def_cfa_offset 8
8718; SKX-NEXT:    retq # sched: [7:1.00]
8719  %a  = fadd float %x, %x
8720  call void @func_f32(float %a)
8721  %b = insertelement <16 x float> undef, float %a, i32 0
8722  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8723  ret <16 x float> %c
8724}
8725
8726declare void @func_f64(double)
8727define <8 x double> @broadcast_sd_spill(double %x) {
8728; GENERIC-LABEL: broadcast_sd_spill:
8729; GENERIC:       # %bb.0:
8730; GENERIC-NEXT:    subq $24, %rsp # sched: [1:0.33]
8731; GENERIC-NEXT:    .cfi_def_cfa_offset 32
8732; GENERIC-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
8733; GENERIC-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
8734; GENERIC-NEXT:    callq func_f64
8735; GENERIC-NEXT:    vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
8736; GENERIC-NEXT:    addq $24, %rsp # sched: [1:0.33]
8737; GENERIC-NEXT:    .cfi_def_cfa_offset 8
8738; GENERIC-NEXT:    retq # sched: [1:1.00]
8739;
8740; SKX-LABEL: broadcast_sd_spill:
8741; SKX:       # %bb.0:
8742; SKX-NEXT:    subq $24, %rsp # sched: [1:0.25]
8743; SKX-NEXT:    .cfi_def_cfa_offset 32
8744; SKX-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
8745; SKX-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
8746; SKX-NEXT:    callq func_f64
8747; SKX-NEXT:    vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
8748; SKX-NEXT:    addq $24, %rsp # sched: [1:0.25]
8749; SKX-NEXT:    .cfi_def_cfa_offset 8
8750; SKX-NEXT:    retq # sched: [7:1.00]
8751  %a  = fadd double %x, %x
8752  call void @func_f64(double %a)
8753  %b = insertelement <8 x double> undef, double %a, i32 0
8754  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8755  ret <8 x double> %c
8756}
8757