• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq  | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw  | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq  | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw  | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
9
10
11define <16 x float> @sitof32(<16 x i32> %a) nounwind {
12; ALL-LABEL: sitof32:
13; ALL:       # %bb.0:
14; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
15; ALL-NEXT:    retq
16  %b = sitofp <16 x i32> %a to <16 x float>
17  ret <16 x float> %b
18}
19
20define <8 x double> @sltof864(<8 x i64> %a) {
21; NODQ-LABEL: sltof864:
22; NODQ:       # %bb.0:
23; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
24; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
25; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
26; NODQ-NEXT:    vmovq %xmm1, %rax
27; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
28; NODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
29; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
30; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
31; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm3
32; NODQ-NEXT:    vmovq %xmm2, %rax
33; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
34; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
35; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
36; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm2
37; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
38; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm3
39; NODQ-NEXT:    vmovq %xmm2, %rax
40; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
41; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
42; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
43; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm3
44; NODQ-NEXT:    vmovq %xmm0, %rax
45; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm0
46; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
47; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
48; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
49; NODQ-NEXT:    retq
50;
51; VLDQ-LABEL: sltof864:
52; VLDQ:       # %bb.0:
53; VLDQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
54; VLDQ-NEXT:    retq
55;
56; DQNOVL-LABEL: sltof864:
57; DQNOVL:       # %bb.0:
58; DQNOVL-NEXT:    vcvtqq2pd %zmm0, %zmm0
59; DQNOVL-NEXT:    retq
60  %b = sitofp <8 x i64> %a to <8 x double>
61  ret <8 x double> %b
62}
63
64define <4 x double> @slto4f64(<4 x i64> %a) {
65; NODQ-LABEL: slto4f64:
66; NODQ:       # %bb.0:
67; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm1
68; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
69; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
70; NODQ-NEXT:    vmovq %xmm1, %rax
71; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
72; NODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
73; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
74; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm2
75; NODQ-NEXT:    vmovq %xmm0, %rax
76; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm0
77; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
78; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
79; NODQ-NEXT:    retq
80;
81; VLDQ-LABEL: slto4f64:
82; VLDQ:       # %bb.0:
83; VLDQ-NEXT:    vcvtqq2pd %ymm0, %ymm0
84; VLDQ-NEXT:    retq
85;
86; DQNOVL-LABEL: slto4f64:
87; DQNOVL:       # %bb.0:
88; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
89; DQNOVL-NEXT:    vcvtqq2pd %zmm0, %zmm0
90; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
91; DQNOVL-NEXT:    retq
92  %b = sitofp <4 x i64> %a to <4 x double>
93  ret <4 x double> %b
94}
95
96define <2 x double> @slto2f64(<2 x i64> %a) {
97; NODQ-LABEL: slto2f64:
98; NODQ:       # %bb.0:
99; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
100; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm1, %xmm1
101; NODQ-NEXT:    vmovq %xmm0, %rax
102; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm0
103; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
104; NODQ-NEXT:    retq
105;
106; VLDQ-LABEL: slto2f64:
107; VLDQ:       # %bb.0:
108; VLDQ-NEXT:    vcvtqq2pd %xmm0, %xmm0
109; VLDQ-NEXT:    retq
110;
111; DQNOVL-LABEL: slto2f64:
112; DQNOVL:       # %bb.0:
113; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
114; DQNOVL-NEXT:    vcvtqq2pd %zmm0, %zmm0
115; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
116; DQNOVL-NEXT:    vzeroupper
117; DQNOVL-NEXT:    retq
118  %b = sitofp <2 x i64> %a to <2 x double>
119  ret <2 x double> %b
120}
121
122define <2 x float> @sltof2f32(<2 x i64> %a) {
123; NODQ-LABEL: sltof2f32:
124; NODQ:       # %bb.0:
125; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
126; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm1, %xmm1
127; NODQ-NEXT:    vmovq %xmm0, %rax
128; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm0
129; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
130; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm1
131; NODQ-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
132; NODQ-NEXT:    retq
133;
134; VLDQ-LABEL: sltof2f32:
135; VLDQ:       # %bb.0:
136; VLDQ-NEXT:    vcvtqq2ps %xmm0, %xmm0
137; VLDQ-NEXT:    retq
138;
139; DQNOVL-LABEL: sltof2f32:
140; DQNOVL:       # %bb.0:
141; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
142; DQNOVL-NEXT:    vcvtqq2ps %zmm0, %ymm0
143; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
144; DQNOVL-NEXT:    vzeroupper
145; DQNOVL-NEXT:    retq
146  %b = sitofp <2 x i64> %a to <2 x float>
147  ret <2 x float>%b
148}
149
150define <4 x float> @slto4f32_mem(<4 x i64>* %a) {
151; NODQ-LABEL: slto4f32_mem:
152; NODQ:       # %bb.0:
153; NODQ-NEXT:    vmovdqu (%rdi), %ymm0
154; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
155; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm1, %xmm1
156; NODQ-NEXT:    vmovq %xmm0, %rax
157; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm2
158; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
159; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
160; NODQ-NEXT:    vmovq %xmm0, %rax
161; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm2
162; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
163; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
164; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm0
165; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
166; NODQ-NEXT:    vzeroupper
167; NODQ-NEXT:    retq
168;
169; VLDQ-LABEL: slto4f32_mem:
170; VLDQ:       # %bb.0:
171; VLDQ-NEXT:    vcvtqq2psy (%rdi), %xmm0
172; VLDQ-NEXT:    retq
173;
174; DQNOVL-LABEL: slto4f32_mem:
175; DQNOVL:       # %bb.0:
176; DQNOVL-NEXT:    vmovups (%rdi), %ymm0
177; DQNOVL-NEXT:    vcvtqq2ps %zmm0, %ymm0
178; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
179; DQNOVL-NEXT:    vzeroupper
180; DQNOVL-NEXT:    retq
181  %a1 = load <4 x i64>, <4 x i64>* %a, align 8
182  %b = sitofp <4 x i64> %a1 to <4 x float>
183  ret <4 x float>%b
184}
185
186define <4 x i64> @f64to4sl(<4 x double> %a) {
187; NODQ-LABEL: f64to4sl:
188; NODQ:       # %bb.0:
189; NODQ-NEXT:    vextractf128 $1, %ymm0, %xmm1
190; NODQ-NEXT:    vcvttsd2si %xmm1, %rax
191; NODQ-NEXT:    vmovq %rax, %xmm2
192; NODQ-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
193; NODQ-NEXT:    vcvttsd2si %xmm1, %rax
194; NODQ-NEXT:    vmovq %rax, %xmm1
195; NODQ-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
196; NODQ-NEXT:    vcvttsd2si %xmm0, %rax
197; NODQ-NEXT:    vmovq %rax, %xmm2
198; NODQ-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
199; NODQ-NEXT:    vcvttsd2si %xmm0, %rax
200; NODQ-NEXT:    vmovq %rax, %xmm0
201; NODQ-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
202; NODQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
203; NODQ-NEXT:    retq
204;
205; VLDQ-LABEL: f64to4sl:
206; VLDQ:       # %bb.0:
207; VLDQ-NEXT:    vcvttpd2qq %ymm0, %ymm0
208; VLDQ-NEXT:    retq
209;
210; DQNOVL-LABEL: f64to4sl:
211; DQNOVL:       # %bb.0:
212; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
213; DQNOVL-NEXT:    vcvttpd2qq %zmm0, %zmm0
214; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
215; DQNOVL-NEXT:    retq
216  %b = fptosi <4 x double> %a to <4 x i64>
217  ret <4 x i64> %b
218}
219
220define <4 x i64> @f32to4sl(<4 x float> %a) {
221; NODQ-LABEL: f32to4sl:
222; NODQ:       # %bb.0:
223; NODQ-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
224; NODQ-NEXT:    vcvttss2si %xmm1, %rax
225; NODQ-NEXT:    vmovq %rax, %xmm1
226; NODQ-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
227; NODQ-NEXT:    vcvttss2si %xmm2, %rax
228; NODQ-NEXT:    vmovq %rax, %xmm2
229; NODQ-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
230; NODQ-NEXT:    vcvttss2si %xmm0, %rax
231; NODQ-NEXT:    vmovq %rax, %xmm2
232; NODQ-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
233; NODQ-NEXT:    vcvttss2si %xmm0, %rax
234; NODQ-NEXT:    vmovq %rax, %xmm0
235; NODQ-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
236; NODQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
237; NODQ-NEXT:    retq
238;
239; VLDQ-LABEL: f32to4sl:
240; VLDQ:       # %bb.0:
241; VLDQ-NEXT:    vcvttps2qq %xmm0, %ymm0
242; VLDQ-NEXT:    retq
243;
244; DQNOVL-LABEL: f32to4sl:
245; DQNOVL:       # %bb.0:
246; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
247; DQNOVL-NEXT:    vcvttps2qq %ymm0, %zmm0
248; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
249; DQNOVL-NEXT:    retq
250  %b = fptosi <4 x float> %a to <4 x i64>
251  ret <4 x i64> %b
252}
253
254define <4 x float> @slto4f32(<4 x i64> %a) {
255; NODQ-LABEL: slto4f32:
256; NODQ:       # %bb.0:
257; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
258; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm1, %xmm1
259; NODQ-NEXT:    vmovq %xmm0, %rax
260; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm2
261; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
262; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
263; NODQ-NEXT:    vmovq %xmm0, %rax
264; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm2
265; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
266; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
267; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm0
268; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
269; NODQ-NEXT:    vzeroupper
270; NODQ-NEXT:    retq
271;
272; VLDQ-LABEL: slto4f32:
273; VLDQ:       # %bb.0:
274; VLDQ-NEXT:    vcvtqq2ps %ymm0, %xmm0
275; VLDQ-NEXT:    vzeroupper
276; VLDQ-NEXT:    retq
277;
278; DQNOVL-LABEL: slto4f32:
279; DQNOVL:       # %bb.0:
280; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
281; DQNOVL-NEXT:    vcvtqq2ps %zmm0, %ymm0
282; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
283; DQNOVL-NEXT:    vzeroupper
284; DQNOVL-NEXT:    retq
285  %b = sitofp <4 x i64> %a to <4 x float>
286  ret <4 x float> %b
287}
288
289define <4 x float> @ulto4f32(<4 x i64> %a) {
290; NODQ-LABEL: ulto4f32:
291; NODQ:       # %bb.0:
292; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
293; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm1, %xmm1
294; NODQ-NEXT:    vmovq %xmm0, %rax
295; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm2, %xmm2
296; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
297; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
298; NODQ-NEXT:    vmovq %xmm0, %rax
299; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm2
300; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
301; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
302; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm0
303; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
304; NODQ-NEXT:    vzeroupper
305; NODQ-NEXT:    retq
306;
307; VLDQ-LABEL: ulto4f32:
308; VLDQ:       # %bb.0:
309; VLDQ-NEXT:    vcvtuqq2ps %ymm0, %xmm0
310; VLDQ-NEXT:    vzeroupper
311; VLDQ-NEXT:    retq
312;
313; DQNOVL-LABEL: ulto4f32:
314; DQNOVL:       # %bb.0:
315; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
316; DQNOVL-NEXT:    vcvtuqq2ps %zmm0, %ymm0
317; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
318; DQNOVL-NEXT:    vzeroupper
319; DQNOVL-NEXT:    retq
320  %b = uitofp <4 x i64> %a to <4 x float>
321  ret <4 x float> %b
322}
323
324define <8 x double> @ulto8f64(<8 x i64> %a) {
325; NODQ-LABEL: ulto8f64:
326; NODQ:       # %bb.0:
327; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
328; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
329; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm2, %xmm2
330; NODQ-NEXT:    vmovq %xmm1, %rax
331; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm1
332; NODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
333; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
334; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
335; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm3
336; NODQ-NEXT:    vmovq %xmm2, %rax
337; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm2
338; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
339; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
340; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm2
341; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
342; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm3
343; NODQ-NEXT:    vmovq %xmm2, %rax
344; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm2
345; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
346; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
347; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm3
348; NODQ-NEXT:    vmovq %xmm0, %rax
349; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm0
350; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
351; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
352; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
353; NODQ-NEXT:    retq
354;
355; VLDQ-LABEL: ulto8f64:
356; VLDQ:       # %bb.0:
357; VLDQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
358; VLDQ-NEXT:    retq
359;
360; DQNOVL-LABEL: ulto8f64:
361; DQNOVL:       # %bb.0:
362; DQNOVL-NEXT:    vcvtuqq2pd %zmm0, %zmm0
363; DQNOVL-NEXT:    retq
364  %b = uitofp <8 x i64> %a to <8 x double>
365  ret <8 x double> %b
366}
367
368define <16 x double> @ulto16f64(<16 x i64> %a) {
369; NODQ-LABEL: ulto16f64:
370; NODQ:       # %bb.0:
371; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
372; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
373; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm3
374; NODQ-NEXT:    vmovq %xmm2, %rax
375; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm2
376; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
377; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
378; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
379; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm4
380; NODQ-NEXT:    vmovq %xmm3, %rax
381; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
382; NODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
383; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
384; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm3
385; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
386; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
387; NODQ-NEXT:    vmovq %xmm3, %rax
388; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
389; NODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
390; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
391; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
392; NODQ-NEXT:    vmovq %xmm0, %rax
393; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm0
394; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
395; NODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
396; NODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
397; NODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm2
398; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
399; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
400; NODQ-NEXT:    vmovq %xmm2, %rax
401; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm2
402; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
403; NODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
404; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
405; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
406; NODQ-NEXT:    vmovq %xmm3, %rax
407; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
408; NODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
409; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
410; NODQ-NEXT:    vextracti128 $1, %ymm1, %xmm3
411; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
412; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
413; NODQ-NEXT:    vmovq %xmm3, %rax
414; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
415; NODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
416; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
417; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
418; NODQ-NEXT:    vmovq %xmm1, %rax
419; NODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm1
420; NODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
421; NODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
422; NODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
423; NODQ-NEXT:    retq
424;
425; VLDQ-LABEL: ulto16f64:
426; VLDQ:       # %bb.0:
427; VLDQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
428; VLDQ-NEXT:    vcvtuqq2pd %zmm1, %zmm1
429; VLDQ-NEXT:    retq
430;
431; DQNOVL-LABEL: ulto16f64:
432; DQNOVL:       # %bb.0:
433; DQNOVL-NEXT:    vcvtuqq2pd %zmm0, %zmm0
434; DQNOVL-NEXT:    vcvtuqq2pd %zmm1, %zmm1
435; DQNOVL-NEXT:    retq
436  %b = uitofp <16 x i64> %a to <16 x double>
437  ret <16 x double> %b
438}
439
440define <16 x i32> @f64to16si(<16 x float> %a) nounwind {
441; ALL-LABEL: f64to16si:
442; ALL:       # %bb.0:
443; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
444; ALL-NEXT:    retq
445  %b = fptosi <16 x float> %a to <16 x i32>
446  ret <16 x i32> %b
447}
448
449define <16 x i8> @f32to16sc(<16 x float> %f) {
450; ALL-LABEL: f32to16sc:
451; ALL:       # %bb.0:
452; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
453; ALL-NEXT:    vpmovdb %zmm0, %xmm0
454; ALL-NEXT:    vzeroupper
455; ALL-NEXT:    retq
456  %res = fptosi <16 x float> %f to <16 x i8>
457  ret <16 x i8> %res
458}
459
460define <16 x i16> @f32to16ss(<16 x float> %f) {
461; ALL-LABEL: f32to16ss:
462; ALL:       # %bb.0:
463; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
464; ALL-NEXT:    vpmovdw %zmm0, %ymm0
465; ALL-NEXT:    retq
466  %res = fptosi <16 x float> %f to <16 x i16>
467  ret <16 x i16> %res
468}
469
470define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
471; ALL-LABEL: f32to16ui:
472; ALL:       # %bb.0:
473; ALL-NEXT:    vcvttps2udq %zmm0, %zmm0
474; ALL-NEXT:    retq
475  %b = fptoui <16 x float> %a to <16 x i32>
476  ret <16 x i32> %b
477}
478
479define <16 x i8> @f32to16uc(<16 x float> %f) {
480; ALL-LABEL: f32to16uc:
481; ALL:       # %bb.0:
482; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
483; ALL-NEXT:    vpmovdb %zmm0, %xmm0
484; ALL-NEXT:    vzeroupper
485; ALL-NEXT:    retq
486  %res = fptoui <16 x float> %f to <16 x i8>
487  ret <16 x i8> %res
488}
489
490define <16 x i16> @f32to16us(<16 x float> %f) {
491; ALL-LABEL: f32to16us:
492; ALL:       # %bb.0:
493; ALL-NEXT:    vcvttps2dq %zmm0, %zmm0
494; ALL-NEXT:    vpmovdw %zmm0, %ymm0
495; ALL-NEXT:    retq
496  %res = fptoui <16 x float> %f to <16 x i16>
497  ret <16 x i16> %res
498}
499
500define <8 x i32> @f32to8ui(<8 x float> %a) nounwind {
501; NOVL-LABEL: f32to8ui:
502; NOVL:       # %bb.0:
503; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
504; NOVL-NEXT:    vcvttps2udq %zmm0, %zmm0
505; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
506; NOVL-NEXT:    retq
507;
508; VL-LABEL: f32to8ui:
509; VL:       # %bb.0:
510; VL-NEXT:    vcvttps2udq %ymm0, %ymm0
511; VL-NEXT:    retq
512  %b = fptoui <8 x float> %a to <8 x i32>
513  ret <8 x i32> %b
514}
515
516define <4 x i32> @f32to4ui(<4 x float> %a) nounwind {
517; NOVL-LABEL: f32to4ui:
518; NOVL:       # %bb.0:
519; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
520; NOVL-NEXT:    vcvttps2udq %zmm0, %zmm0
521; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
522; NOVL-NEXT:    vzeroupper
523; NOVL-NEXT:    retq
524;
525; VL-LABEL: f32to4ui:
526; VL:       # %bb.0:
527; VL-NEXT:    vcvttps2udq %xmm0, %xmm0
528; VL-NEXT:    retq
529  %b = fptoui <4 x float> %a to <4 x i32>
530  ret <4 x i32> %b
531}
532
533define <8 x i32> @f64to8ui(<8 x double> %a) nounwind {
534; ALL-LABEL: f64to8ui:
535; ALL:       # %bb.0:
536; ALL-NEXT:    vcvttpd2udq %zmm0, %ymm0
537; ALL-NEXT:    retq
538  %b = fptoui <8 x double> %a to <8 x i32>
539  ret <8 x i32> %b
540}
541
542define <8 x i16> @f64to8us(<8 x double> %f) {
543; NOVL-LABEL: f64to8us:
544; NOVL:       # %bb.0:
545; NOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
546; NOVL-NEXT:    vpmovdw %zmm0, %ymm0
547; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
548; NOVL-NEXT:    vzeroupper
549; NOVL-NEXT:    retq
550;
551; VL-LABEL: f64to8us:
552; VL:       # %bb.0:
553; VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
554; VL-NEXT:    vpmovdw %ymm0, %xmm0
555; VL-NEXT:    vzeroupper
556; VL-NEXT:    retq
557  %res = fptoui <8 x double> %f to <8 x i16>
558  ret <8 x i16> %res
559}
560
561define <8 x i8> @f64to8uc(<8 x double> %f) {
562; NOVL-LABEL: f64to8uc:
563; NOVL:       # %bb.0:
564; NOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
565; NOVL-NEXT:    vpmovdw %zmm0, %ymm0
566; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
567; NOVL-NEXT:    vzeroupper
568; NOVL-NEXT:    retq
569;
570; VL-LABEL: f64to8uc:
571; VL:       # %bb.0:
572; VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
573; VL-NEXT:    vpmovdw %ymm0, %xmm0
574; VL-NEXT:    vzeroupper
575; VL-NEXT:    retq
576  %res = fptoui <8 x double> %f to <8 x i8>
577  ret <8 x i8> %res
578}
579
580define <4 x i32> @f64to4ui(<4 x double> %a) nounwind {
581; NOVL-LABEL: f64to4ui:
582; NOVL:       # %bb.0:
583; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
584; NOVL-NEXT:    vcvttpd2udq %zmm0, %ymm0
585; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
586; NOVL-NEXT:    vzeroupper
587; NOVL-NEXT:    retq
588;
589; VL-LABEL: f64to4ui:
590; VL:       # %bb.0:
591; VL-NEXT:    vcvttpd2udq %ymm0, %xmm0
592; VL-NEXT:    vzeroupper
593; VL-NEXT:    retq
594  %b = fptoui <4 x double> %a to <4 x i32>
595  ret <4 x i32> %b
596}
597
598define <8 x double> @sito8f64(<8 x i32> %a) {
599; ALL-LABEL: sito8f64:
600; ALL:       # %bb.0:
601; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
602; ALL-NEXT:    retq
603  %b = sitofp <8 x i32> %a to <8 x double>
604  ret <8 x double> %b
605}
606define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
607; KNL-LABEL: i32to8f64_mask:
608; KNL:       # %bb.0:
609; KNL-NEXT:    kmovw %edi, %k1
610; KNL-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
611; KNL-NEXT:    retq
612;
613; VLBW-LABEL: i32to8f64_mask:
614; VLBW:       # %bb.0:
615; VLBW-NEXT:    kmovd %edi, %k1
616; VLBW-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
617; VLBW-NEXT:    retq
618;
619; VLNOBW-LABEL: i32to8f64_mask:
620; VLNOBW:       # %bb.0:
621; VLNOBW-NEXT:    kmovw %edi, %k1
622; VLNOBW-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
623; VLNOBW-NEXT:    retq
624;
625; DQNOVL-LABEL: i32to8f64_mask:
626; DQNOVL:       # %bb.0:
627; DQNOVL-NEXT:    kmovw %edi, %k1
628; DQNOVL-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
629; DQNOVL-NEXT:    retq
630;
631; AVX512BW-LABEL: i32to8f64_mask:
632; AVX512BW:       # %bb.0:
633; AVX512BW-NEXT:    kmovd %edi, %k1
634; AVX512BW-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
635; AVX512BW-NEXT:    retq
636  %1 = bitcast i8 %c to <8 x i1>
637  %2 = sitofp <8 x i32> %b to <8 x double>
638  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
639  ret <8 x double> %3
640}
641define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
642; KNL-LABEL: sito8f64_maskz:
643; KNL:       # %bb.0:
644; KNL-NEXT:    kmovw %edi, %k1
645; KNL-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
646; KNL-NEXT:    retq
647;
648; VLBW-LABEL: sito8f64_maskz:
649; VLBW:       # %bb.0:
650; VLBW-NEXT:    kmovd %edi, %k1
651; VLBW-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
652; VLBW-NEXT:    retq
653;
654; VLNOBW-LABEL: sito8f64_maskz:
655; VLNOBW:       # %bb.0:
656; VLNOBW-NEXT:    kmovw %edi, %k1
657; VLNOBW-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
658; VLNOBW-NEXT:    retq
659;
660; DQNOVL-LABEL: sito8f64_maskz:
661; DQNOVL:       # %bb.0:
662; DQNOVL-NEXT:    kmovw %edi, %k1
663; DQNOVL-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
664; DQNOVL-NEXT:    retq
665;
666; AVX512BW-LABEL: sito8f64_maskz:
667; AVX512BW:       # %bb.0:
668; AVX512BW-NEXT:    kmovd %edi, %k1
669; AVX512BW-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
670; AVX512BW-NEXT:    retq
671  %1 = bitcast i8 %b to <8 x i1>
672  %2 = sitofp <8 x i32> %a to <8 x double>
673  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
674  ret <8 x double> %3
675}
676
677define <8 x i32> @f64to8si(<8 x double> %a) {
678; ALL-LABEL: f64to8si:
679; ALL:       # %bb.0:
680; ALL-NEXT:    vcvttpd2dq %zmm0, %ymm0
681; ALL-NEXT:    retq
682  %b = fptosi <8 x double> %a to <8 x i32>
683  ret <8 x i32> %b
684}
685
686define <4 x i32> @f64to4si(<4 x double> %a) {
687; ALL-LABEL: f64to4si:
688; ALL:       # %bb.0:
689; ALL-NEXT:    vcvttpd2dq %ymm0, %xmm0
690; ALL-NEXT:    vzeroupper
691; ALL-NEXT:    retq
692  %b = fptosi <4 x double> %a to <4 x i32>
693  ret <4 x i32> %b
694}
695
696define <16 x float> @f64to16f32(<16 x double> %b) nounwind {
697; ALL-LABEL: f64to16f32:
698; ALL:       # %bb.0:
699; ALL-NEXT:    vcvtpd2ps %zmm0, %ymm0
700; ALL-NEXT:    vcvtpd2ps %zmm1, %ymm1
701; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
702; ALL-NEXT:    retq
703  %a = fptrunc <16 x double> %b to <16 x float>
704  ret <16 x float> %a
705}
706
707define <4 x float> @f64to4f32(<4 x double> %b) {
708; ALL-LABEL: f64to4f32:
709; ALL:       # %bb.0:
710; ALL-NEXT:    vcvtpd2ps %ymm0, %xmm0
711; ALL-NEXT:    vzeroupper
712; ALL-NEXT:    retq
713  %a = fptrunc <4 x double> %b to <4 x float>
714  ret <4 x float> %a
715}
716
717define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) {
718; NOVLDQ-LABEL: f64to4f32_mask:
719; NOVLDQ:       # %bb.0:
720; NOVLDQ-NEXT:    vpslld $31, %xmm1, %xmm1
721; NOVLDQ-NEXT:    vptestmd %zmm1, %zmm1, %k1
722; NOVLDQ-NEXT:    vcvtpd2ps %ymm0, %xmm0
723; NOVLDQ-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
724; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
725; NOVLDQ-NEXT:    vzeroupper
726; NOVLDQ-NEXT:    retq
727;
728; VLDQ-LABEL: f64to4f32_mask:
729; VLDQ:       # %bb.0:
730; VLDQ-NEXT:    vpslld $31, %xmm1, %xmm1
731; VLDQ-NEXT:    vpmovd2m %xmm1, %k1
732; VLDQ-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
733; VLDQ-NEXT:    vzeroupper
734; VLDQ-NEXT:    retq
735;
736; VLNODQ-LABEL: f64to4f32_mask:
737; VLNODQ:       # %bb.0:
738; VLNODQ-NEXT:    vpslld $31, %xmm1, %xmm1
739; VLNODQ-NEXT:    vptestmd %xmm1, %xmm1, %k1
740; VLNODQ-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
741; VLNODQ-NEXT:    vzeroupper
742; VLNODQ-NEXT:    retq
743;
744; DQNOVL-LABEL: f64to4f32_mask:
745; DQNOVL:       # %bb.0:
746; DQNOVL-NEXT:    vpslld $31, %xmm1, %xmm1
747; DQNOVL-NEXT:    vpmovd2m %zmm1, %k1
748; DQNOVL-NEXT:    vcvtpd2ps %ymm0, %xmm0
749; DQNOVL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
750; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
751; DQNOVL-NEXT:    vzeroupper
752; DQNOVL-NEXT:    retq
753  %a = fptrunc <4 x double> %b to <4 x float>
754  %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
755  ret <4 x float> %c
756}
757
758define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
759; ALL-LABEL: f64tof32_inreg:
760; ALL:       # %bb.0:
761; ALL-NEXT:    vcvtsd2ss %xmm0, %xmm1, %xmm0
762; ALL-NEXT:    retq
763  %ext = extractelement <2 x double> %a0, i32 0
764  %cvt = fptrunc double %ext to float
765  %res = insertelement <4 x float> %a1, float %cvt, i32 0
766  ret <4 x float> %res
767}
768
769define <8 x double> @f32to8f64(<8 x float> %b) nounwind {
770; ALL-LABEL: f32to8f64:
771; ALL:       # %bb.0:
772; ALL-NEXT:    vcvtps2pd %ymm0, %zmm0
773; ALL-NEXT:    retq
774  %a = fpext <8 x float> %b to <8 x double>
775  ret <8 x double> %a
776}
777
778define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) {
779; NOVL-LABEL: f32to4f64_mask:
780; NOVL:       # %bb.0:
781; NOVL-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
782; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
783; NOVL-NEXT:    vcvtps2pd %xmm0, %ymm0
784; NOVL-NEXT:    vcmpltpd %zmm2, %zmm1, %k1
785; NOVL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
786; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
787; NOVL-NEXT:    retq
788;
789; VL-LABEL: f32to4f64_mask:
790; VL:       # %bb.0:
791; VL-NEXT:    vcmpltpd %ymm2, %ymm1, %k1
792; VL-NEXT:    vcvtps2pd %xmm0, %ymm0 {%k1} {z}
793; VL-NEXT:    retq
794  %a = fpext <4 x float> %b to <4 x double>
795  %mask = fcmp ogt <4 x double> %a1, %b1
796  %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
797  ret <4 x double> %c
798}
799
800define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
801; ALL-LABEL: f32tof64_inreg:
802; ALL:       # %bb.0:
803; ALL-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
804; ALL-NEXT:    retq
805  %ext = extractelement <4 x float> %a1, i32 0
806  %cvt = fpext float %ext to double
807  %res = insertelement <2 x double> %a0, double %cvt, i32 0
808  ret <2 x double> %res
809}
810
811define double @sltof64_load(i64* nocapture %e) {
812; ALL-LABEL: sltof64_load:
813; ALL:       # %bb.0: # %entry
814; ALL-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0
815; ALL-NEXT:    retq
816entry:
817  %tmp1 = load i64, i64* %e, align 8
818  %conv = sitofp i64 %tmp1 to double
819  ret double %conv
820}
821
822define double @sitof64_load(i32* %e) {
823; ALL-LABEL: sitof64_load:
824; ALL:       # %bb.0: # %entry
825; ALL-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0
826; ALL-NEXT:    retq
827entry:
828  %tmp1 = load i32, i32* %e, align 4
829  %conv = sitofp i32 %tmp1 to double
830  ret double %conv
831}
832
833define float @sitof32_load(i32* %e) {
834; ALL-LABEL: sitof32_load:
835; ALL:       # %bb.0: # %entry
836; ALL-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0
837; ALL-NEXT:    retq
838entry:
839  %tmp1 = load i32, i32* %e, align 4
840  %conv = sitofp i32 %tmp1 to float
841  ret float %conv
842}
843
844define float @sltof32_load(i64* %e) {
845; ALL-LABEL: sltof32_load:
846; ALL:       # %bb.0: # %entry
847; ALL-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0
848; ALL-NEXT:    retq
849entry:
850  %tmp1 = load i64, i64* %e, align 8
851  %conv = sitofp i64 %tmp1 to float
852  ret float %conv
853}
854
855define void @f32tof64_loadstore() {
856; ALL-LABEL: f32tof64_loadstore:
857; ALL:       # %bb.0: # %entry
858; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
859; ALL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
860; ALL-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
861; ALL-NEXT:    retq
862entry:
863  %f = alloca float, align 4
864  %d = alloca double, align 8
865  %tmp = load float, float* %f, align 4
866  %conv = fpext float %tmp to double
867  store double %conv, double* %d, align 8
868  ret void
869}
870
871define void @f64tof32_loadstore() nounwind uwtable {
872; ALL-LABEL: f64tof32_loadstore:
873; ALL:       # %bb.0: # %entry
874; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
875; ALL-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
876; ALL-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
877; ALL-NEXT:    retq
878entry:
879  %f = alloca float, align 4
880  %d = alloca double, align 8
881  %tmp = load double, double* %d, align 8
882  %conv = fptrunc double %tmp to float
883  store float %conv, float* %f, align 4
884  ret void
885}
886
887define double @long_to_double(i64 %x) {
888; ALL-LABEL: long_to_double:
889; ALL:       # %bb.0:
890; ALL-NEXT:    vmovq %rdi, %xmm0
891; ALL-NEXT:    retq
892   %res = bitcast i64 %x to double
893   ret double %res
894}
895
896define i64 @double_to_long(double %x) {
897; ALL-LABEL: double_to_long:
898; ALL:       # %bb.0:
899; ALL-NEXT:    vmovq %xmm0, %rax
900; ALL-NEXT:    retq
901   %res = bitcast double %x to i64
902   ret i64 %res
903}
904
905define float @int_to_float(i32 %x) {
906; ALL-LABEL: int_to_float:
907; ALL:       # %bb.0:
908; ALL-NEXT:    vmovd %edi, %xmm0
909; ALL-NEXT:    retq
910   %res = bitcast i32 %x to float
911   ret float %res
912}
913
914define i32 @float_to_int(float %x) {
915; ALL-LABEL: float_to_int:
916; ALL:       # %bb.0:
917; ALL-NEXT:    vmovd %xmm0, %eax
918; ALL-NEXT:    retq
919   %res = bitcast float %x to i32
920   ret i32 %res
921}
922
923define <16 x double> @uito16f64(<16 x i32> %a) nounwind {
924; ALL-LABEL: uito16f64:
925; ALL:       # %bb.0:
926; ALL-NEXT:    vcvtudq2pd %ymm0, %zmm2
927; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
928; ALL-NEXT:    vcvtudq2pd %ymm0, %zmm1
929; ALL-NEXT:    vmovaps %zmm2, %zmm0
930; ALL-NEXT:    retq
931  %b = uitofp <16 x i32> %a to <16 x double>
932  ret <16 x double> %b
933}
934
935define <8 x float> @slto8f32(<8 x i64> %a) {
936; NODQ-LABEL: slto8f32:
937; NODQ:       # %bb.0:
938; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
939; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
940; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm2
941; NODQ-NEXT:    vmovq %xmm1, %rax
942; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm1
943; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
944; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
945; NODQ-NEXT:    vmovq %xmm2, %rax
946; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm3
947; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
948; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
949; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm2
950; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
951; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
952; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm2
953; NODQ-NEXT:    vmovq %xmm0, %rax
954; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm3
955; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
956; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
957; NODQ-NEXT:    vmovq %xmm0, %rax
958; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm3
959; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
960; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
961; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm0
962; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
963; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
964; NODQ-NEXT:    retq
965;
966; VLDQ-LABEL: slto8f32:
967; VLDQ:       # %bb.0:
968; VLDQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
969; VLDQ-NEXT:    retq
970;
971; DQNOVL-LABEL: slto8f32:
972; DQNOVL:       # %bb.0:
973; DQNOVL-NEXT:    vcvtqq2ps %zmm0, %ymm0
974; DQNOVL-NEXT:    retq
975  %b = sitofp <8 x i64> %a to <8 x float>
976  ret <8 x float> %b
977}
978
979define <16 x float> @slto16f32(<16 x i64> %a) {
980; NODQ-LABEL: slto16f32:
981; NODQ:       # %bb.0:
982; NODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm2
983; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
984; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm3
985; NODQ-NEXT:    vmovq %xmm2, %rax
986; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm2
987; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
988; NODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm3
989; NODQ-NEXT:    vmovq %xmm3, %rax
990; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm4
991; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
992; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
993; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm3
994; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
995; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
996; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm3
997; NODQ-NEXT:    vmovq %xmm1, %rax
998; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm4
999; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
1000; NODQ-NEXT:    vextracti128 $1, %ymm1, %xmm1
1001; NODQ-NEXT:    vmovq %xmm1, %rax
1002; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm4
1003; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
1004; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1005; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm1
1006; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
1007; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1008; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1009; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1010; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm3
1011; NODQ-NEXT:    vmovq %xmm2, %rax
1012; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm2
1013; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
1014; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm3
1015; NODQ-NEXT:    vmovq %xmm3, %rax
1016; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm4
1017; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
1018; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1019; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm3
1020; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
1021; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1022; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm3
1023; NODQ-NEXT:    vmovq %xmm0, %rax
1024; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm4
1025; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
1026; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
1027; NODQ-NEXT:    vmovq %xmm0, %rax
1028; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm4
1029; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
1030; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1031; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm0
1032; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
1033; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1034; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1035; NODQ-NEXT:    retq
1036;
1037; VLDQ-LABEL: slto16f32:
1038; VLDQ:       # %bb.0:
1039; VLDQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
1040; VLDQ-NEXT:    vcvtqq2ps %zmm1, %ymm1
1041; VLDQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1042; VLDQ-NEXT:    retq
1043;
1044; DQNOVL-LABEL: slto16f32:
1045; DQNOVL:       # %bb.0:
1046; DQNOVL-NEXT:    vcvtqq2ps %zmm0, %ymm0
1047; DQNOVL-NEXT:    vcvtqq2ps %zmm1, %ymm1
1048; DQNOVL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1049; DQNOVL-NEXT:    retq
1050  %b = sitofp <16 x i64> %a to <16 x float>
1051  ret <16 x float> %b
1052}
1053
1054define <8 x double> @slto8f64(<8 x i64> %a) {
1055; NODQ-LABEL: slto8f64:
1056; NODQ:       # %bb.0:
1057; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
1058; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1059; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
1060; NODQ-NEXT:    vmovq %xmm1, %rax
1061; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
1062; NODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1063; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1064; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1065; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm3
1066; NODQ-NEXT:    vmovq %xmm2, %rax
1067; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
1068; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1069; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
1070; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm2
1071; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1072; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm3
1073; NODQ-NEXT:    vmovq %xmm2, %rax
1074; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
1075; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1076; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1077; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm3
1078; NODQ-NEXT:    vmovq %xmm0, %rax
1079; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm0
1080; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
1081; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1082; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1083; NODQ-NEXT:    retq
1084;
1085; VLDQ-LABEL: slto8f64:
1086; VLDQ:       # %bb.0:
1087; VLDQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
1088; VLDQ-NEXT:    retq
1089;
1090; DQNOVL-LABEL: slto8f64:
1091; DQNOVL:       # %bb.0:
1092; DQNOVL-NEXT:    vcvtqq2pd %zmm0, %zmm0
1093; DQNOVL-NEXT:    retq
1094  %b = sitofp <8 x i64> %a to <8 x double>
1095  ret <8 x double> %b
1096}
1097
1098define <16 x double> @slto16f64(<16 x i64> %a) {
1099; NODQ-LABEL: slto16f64:
1100; NODQ:       # %bb.0:
1101; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
1102; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1103; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm3
1104; NODQ-NEXT:    vmovq %xmm2, %rax
1105; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
1106; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1107; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
1108; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1109; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm4
1110; NODQ-NEXT:    vmovq %xmm3, %rax
1111; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
1112; NODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1113; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
1114; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm3
1115; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1116; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
1117; NODQ-NEXT:    vmovq %xmm3, %rax
1118; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
1119; NODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1120; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1121; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
1122; NODQ-NEXT:    vmovq %xmm0, %rax
1123; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm0
1124; NODQ-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
1125; NODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1126; NODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
1127; NODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm2
1128; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1129; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
1130; NODQ-NEXT:    vmovq %xmm2, %rax
1131; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm2
1132; NODQ-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1133; NODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
1134; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1135; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
1136; NODQ-NEXT:    vmovq %xmm3, %rax
1137; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
1138; NODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1139; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
1140; NODQ-NEXT:    vextracti128 $1, %ymm1, %xmm3
1141; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1142; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
1143; NODQ-NEXT:    vmovq %xmm3, %rax
1144; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
1145; NODQ-NEXT:    vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
1146; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1147; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
1148; NODQ-NEXT:    vmovq %xmm1, %rax
1149; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm1
1150; NODQ-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0]
1151; NODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
1152; NODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
1153; NODQ-NEXT:    retq
1154;
1155; VLDQ-LABEL: slto16f64:
1156; VLDQ:       # %bb.0:
1157; VLDQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
1158; VLDQ-NEXT:    vcvtqq2pd %zmm1, %zmm1
1159; VLDQ-NEXT:    retq
1160;
1161; DQNOVL-LABEL: slto16f64:
1162; DQNOVL:       # %bb.0:
1163; DQNOVL-NEXT:    vcvtqq2pd %zmm0, %zmm0
1164; DQNOVL-NEXT:    vcvtqq2pd %zmm1, %zmm1
1165; DQNOVL-NEXT:    retq
1166  %b = sitofp <16 x i64> %a to <16 x double>
1167  ret <16 x double> %b
1168}
1169
1170define <8 x float> @ulto8f32(<8 x i64> %a) {
1171; NODQ-LABEL: ulto8f32:
1172; NODQ:       # %bb.0:
1173; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
1174; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1175; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm2, %xmm2
1176; NODQ-NEXT:    vmovq %xmm1, %rax
1177; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm1
1178; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
1179; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
1180; NODQ-NEXT:    vmovq %xmm2, %rax
1181; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm3
1182; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
1183; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1184; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm2
1185; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
1186; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1187; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm2
1188; NODQ-NEXT:    vmovq %xmm0, %rax
1189; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm3
1190; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
1191; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
1192; NODQ-NEXT:    vmovq %xmm0, %rax
1193; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm3
1194; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
1195; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1196; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm0
1197; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
1198; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1199; NODQ-NEXT:    retq
1200;
1201; VLDQ-LABEL: ulto8f32:
1202; VLDQ:       # %bb.0:
1203; VLDQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
1204; VLDQ-NEXT:    retq
1205;
1206; DQNOVL-LABEL: ulto8f32:
1207; DQNOVL:       # %bb.0:
1208; DQNOVL-NEXT:    vcvtuqq2ps %zmm0, %ymm0
1209; DQNOVL-NEXT:    retq
1210  %b = uitofp <8 x i64> %a to <8 x float>
1211  ret <8 x float> %b
1212}
1213
1214define <16 x float> @ulto16f32(<16 x i64> %a) {
1215; NODQ-LABEL: ulto16f32:
1216; NODQ:       # %bb.0:
1217; NODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm2
1218; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1219; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm3
1220; NODQ-NEXT:    vmovq %xmm2, %rax
1221; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm2
1222; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
1223; NODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm3
1224; NODQ-NEXT:    vmovq %xmm3, %rax
1225; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm4
1226; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
1227; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1228; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm3
1229; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
1230; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1231; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm3
1232; NODQ-NEXT:    vmovq %xmm1, %rax
1233; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm4
1234; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
1235; NODQ-NEXT:    vextracti128 $1, %ymm1, %xmm1
1236; NODQ-NEXT:    vmovq %xmm1, %rax
1237; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm4
1238; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
1239; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
1240; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm1
1241; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
1242; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1243; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
1244; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
1245; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm3
1246; NODQ-NEXT:    vmovq %xmm2, %rax
1247; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm2
1248; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
1249; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm3
1250; NODQ-NEXT:    vmovq %xmm3, %rax
1251; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm4
1252; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
1253; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
1254; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm3
1255; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
1256; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1257; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm3
1258; NODQ-NEXT:    vmovq %xmm0, %rax
1259; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm4
1260; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
1261; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
1262; NODQ-NEXT:    vmovq %xmm0, %rax
1263; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm4
1264; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
1265; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
1266; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm0
1267; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
1268; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1269; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1270; NODQ-NEXT:    retq
1271;
1272; VLDQ-LABEL: ulto16f32:
1273; VLDQ:       # %bb.0:
1274; VLDQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
1275; VLDQ-NEXT:    vcvtuqq2ps %zmm1, %ymm1
1276; VLDQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1277; VLDQ-NEXT:    retq
1278;
1279; DQNOVL-LABEL: ulto16f32:
1280; DQNOVL:       # %bb.0:
1281; DQNOVL-NEXT:    vcvtuqq2ps %zmm0, %ymm0
1282; DQNOVL-NEXT:    vcvtuqq2ps %zmm1, %ymm1
1283; DQNOVL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1284; DQNOVL-NEXT:    retq
1285  %b = uitofp <16 x i64> %a to <16 x float>
1286  ret <16 x float> %b
1287}
1288
1289define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
1290; KNL-LABEL: uito8f64_mask:
1291; KNL:       # %bb.0:
1292; KNL-NEXT:    kmovw %edi, %k1
1293; KNL-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
1294; KNL-NEXT:    retq
1295;
1296; VLBW-LABEL: uito8f64_mask:
1297; VLBW:       # %bb.0:
1298; VLBW-NEXT:    kmovd %edi, %k1
1299; VLBW-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
1300; VLBW-NEXT:    retq
1301;
1302; VLNOBW-LABEL: uito8f64_mask:
1303; VLNOBW:       # %bb.0:
1304; VLNOBW-NEXT:    kmovw %edi, %k1
1305; VLNOBW-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
1306; VLNOBW-NEXT:    retq
1307;
1308; DQNOVL-LABEL: uito8f64_mask:
1309; DQNOVL:       # %bb.0:
1310; DQNOVL-NEXT:    kmovw %edi, %k1
1311; DQNOVL-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
1312; DQNOVL-NEXT:    retq
1313;
1314; AVX512BW-LABEL: uito8f64_mask:
1315; AVX512BW:       # %bb.0:
1316; AVX512BW-NEXT:    kmovd %edi, %k1
1317; AVX512BW-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
1318; AVX512BW-NEXT:    retq
1319  %1 = bitcast i8 %c to <8 x i1>
1320  %2 = uitofp <8 x i32> %b to <8 x double>
1321  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
1322  ret <8 x double> %3
1323}
1324define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
1325; KNL-LABEL: uito8f64_maskz:
1326; KNL:       # %bb.0:
1327; KNL-NEXT:    kmovw %edi, %k1
1328; KNL-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
1329; KNL-NEXT:    retq
1330;
1331; VLBW-LABEL: uito8f64_maskz:
1332; VLBW:       # %bb.0:
1333; VLBW-NEXT:    kmovd %edi, %k1
1334; VLBW-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
1335; VLBW-NEXT:    retq
1336;
1337; VLNOBW-LABEL: uito8f64_maskz:
1338; VLNOBW:       # %bb.0:
1339; VLNOBW-NEXT:    kmovw %edi, %k1
1340; VLNOBW-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
1341; VLNOBW-NEXT:    retq
1342;
1343; DQNOVL-LABEL: uito8f64_maskz:
1344; DQNOVL:       # %bb.0:
1345; DQNOVL-NEXT:    kmovw %edi, %k1
1346; DQNOVL-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
1347; DQNOVL-NEXT:    retq
1348;
1349; AVX512BW-LABEL: uito8f64_maskz:
1350; AVX512BW:       # %bb.0:
1351; AVX512BW-NEXT:    kmovd %edi, %k1
1352; AVX512BW-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
1353; AVX512BW-NEXT:    retq
1354  %1 = bitcast i8 %b to <8 x i1>
1355  %2 = uitofp <8 x i32> %a to <8 x double>
1356  %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
1357  ret <8 x double> %3
1358}
1359
1360define <4 x double> @uito4f64(<4 x i32> %a) nounwind {
1361; NOVL-LABEL: uito4f64:
1362; NOVL:       # %bb.0:
1363; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1364; NOVL-NEXT:    vcvtudq2pd %ymm0, %zmm0
1365; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1366; NOVL-NEXT:    retq
1367;
1368; VL-LABEL: uito4f64:
1369; VL:       # %bb.0:
1370; VL-NEXT:    vcvtudq2pd %xmm0, %ymm0
1371; VL-NEXT:    retq
1372  %b = uitofp <4 x i32> %a to <4 x double>
1373  ret <4 x double> %b
1374}
1375
1376define <16 x float> @uito16f32(<16 x i32> %a) nounwind {
1377; ALL-LABEL: uito16f32:
1378; ALL:       # %bb.0:
1379; ALL-NEXT:    vcvtudq2ps %zmm0, %zmm0
1380; ALL-NEXT:    retq
1381  %b = uitofp <16 x i32> %a to <16 x float>
1382  ret <16 x float> %b
1383}
1384
1385define <8 x double> @uito8f64(<8 x i32> %a) {
1386; ALL-LABEL: uito8f64:
1387; ALL:       # %bb.0:
1388; ALL-NEXT:    vcvtudq2pd %ymm0, %zmm0
1389; ALL-NEXT:    retq
1390  %b = uitofp <8 x i32> %a to <8 x double>
1391  ret <8 x double> %b
1392}
1393
1394define <8 x float> @uito8f32(<8 x i32> %a) nounwind {
1395; NOVL-LABEL: uito8f32:
1396; NOVL:       # %bb.0:
1397; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1398; NOVL-NEXT:    vcvtudq2ps %zmm0, %zmm0
1399; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1400; NOVL-NEXT:    retq
1401;
1402; VL-LABEL: uito8f32:
1403; VL:       # %bb.0:
1404; VL-NEXT:    vcvtudq2ps %ymm0, %ymm0
1405; VL-NEXT:    retq
1406  %b = uitofp <8 x i32> %a to <8 x float>
1407  ret <8 x float> %b
1408}
1409
1410define <4 x float> @uito4f32(<4 x i32> %a) nounwind {
1411; NOVL-LABEL: uito4f32:
1412; NOVL:       # %bb.0:
1413; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1414; NOVL-NEXT:    vcvtudq2ps %zmm0, %zmm0
1415; NOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1416; NOVL-NEXT:    vzeroupper
1417; NOVL-NEXT:    retq
1418;
1419; VL-LABEL: uito4f32:
1420; VL:       # %bb.0:
1421; VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
1422; VL-NEXT:    retq
1423  %b = uitofp <4 x i32> %a to <4 x float>
1424  ret <4 x float> %b
1425}
1426
1427define i32 @fptosi(float %a) nounwind {
1428; ALL-LABEL: fptosi:
1429; ALL:       # %bb.0:
1430; ALL-NEXT:    vcvttss2si %xmm0, %eax
1431; ALL-NEXT:    retq
1432  %b = fptosi float %a to i32
1433  ret i32 %b
1434}
1435
1436define i32 @fptoui(float %a) nounwind {
1437; ALL-LABEL: fptoui:
1438; ALL:       # %bb.0:
1439; ALL-NEXT:    vcvttss2usi %xmm0, %eax
1440; ALL-NEXT:    retq
1441  %b = fptoui float %a to i32
1442  ret i32 %b
1443}
1444
1445define float @uitof32(i32 %a) nounwind {
1446; ALL-LABEL: uitof32:
1447; ALL:       # %bb.0:
1448; ALL-NEXT:    vcvtusi2ssl %edi, %xmm0, %xmm0
1449; ALL-NEXT:    retq
1450  %b = uitofp i32 %a to float
1451  ret float %b
1452}
1453
1454define double @uitof64(i32 %a) nounwind {
1455; ALL-LABEL: uitof64:
1456; ALL:       # %bb.0:
1457; ALL-NEXT:    vcvtusi2sdl %edi, %xmm0, %xmm0
1458; ALL-NEXT:    retq
1459  %b = uitofp i32 %a to double
1460  ret double %b
1461}
1462
1463define <16 x float> @sbto16f32(<16 x i32> %a) {
1464; NODQ-LABEL: sbto16f32:
1465; NODQ:       # %bb.0:
1466; NODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1467; NODQ-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
1468; NODQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1469; NODQ-NEXT:    vcvtdq2ps %zmm0, %zmm0
1470; NODQ-NEXT:    retq
1471;
1472; VLDQ-LABEL: sbto16f32:
1473; VLDQ:       # %bb.0:
1474; VLDQ-NEXT:    vpmovd2m %zmm0, %k0
1475; VLDQ-NEXT:    vpmovm2d %k0, %zmm0
1476; VLDQ-NEXT:    vcvtdq2ps %zmm0, %zmm0
1477; VLDQ-NEXT:    retq
1478;
1479; DQNOVL-LABEL: sbto16f32:
1480; DQNOVL:       # %bb.0:
1481; DQNOVL-NEXT:    vpmovd2m %zmm0, %k0
1482; DQNOVL-NEXT:    vpmovm2d %k0, %zmm0
1483; DQNOVL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1484; DQNOVL-NEXT:    retq
1485  %mask = icmp slt <16 x i32> %a, zeroinitializer
1486  %1 = sitofp <16 x i1> %mask to <16 x float>
1487  ret <16 x float> %1
1488}
1489
1490define <16 x float> @scto16f32(<16 x i8> %a) {
1491; ALL-LABEL: scto16f32:
1492; ALL:       # %bb.0:
1493; ALL-NEXT:    vpmovsxbd %xmm0, %zmm0
1494; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1495; ALL-NEXT:    retq
1496  %1 = sitofp <16 x i8> %a to <16 x float>
1497  ret <16 x float> %1
1498}
1499
1500define <16 x float> @ssto16f32(<16 x i16> %a) {
1501; ALL-LABEL: ssto16f32:
1502; ALL:       # %bb.0:
1503; ALL-NEXT:    vpmovsxwd %ymm0, %zmm0
1504; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1505; ALL-NEXT:    retq
1506  %1 = sitofp <16 x i16> %a to <16 x float>
1507  ret <16 x float> %1
1508}
1509
1510define <8 x double> @ssto16f64(<8 x i16> %a) {
1511; ALL-LABEL: ssto16f64:
1512; ALL:       # %bb.0:
1513; ALL-NEXT:    vpmovsxwd %xmm0, %ymm0
1514; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1515; ALL-NEXT:    retq
1516  %1 = sitofp <8 x i16> %a to <8 x double>
1517  ret <8 x double> %1
1518}
1519
1520define <8 x double> @scto8f64(<8 x i8> %a) {
1521; ALL-LABEL: scto8f64:
1522; ALL:       # %bb.0:
1523; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1524; ALL-NEXT:    vpslld $24, %ymm0, %ymm0
1525; ALL-NEXT:    vpsrad $24, %ymm0, %ymm0
1526; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1527; ALL-NEXT:    retq
1528  %1 = sitofp <8 x i8> %a to <8 x double>
1529  ret <8 x double> %1
1530}
1531
1532define <16 x double> @scto16f64(<16 x i8> %a) {
1533; ALL-LABEL: scto16f64:
1534; ALL:       # %bb.0:
1535; ALL-NEXT:    vpmovsxbd %xmm0, %zmm1
1536; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1537; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1538; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1539; ALL-NEXT:    retq
1540  %b = sitofp <16 x i8> %a to <16 x double>
1541  ret <16 x double> %b
1542}
1543
1544define <16 x double> @sbto16f64(<16 x double> %a) {
1545; NODQ-LABEL: sbto16f64:
1546; NODQ:       # %bb.0:
1547; NODQ-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1548; NODQ-NEXT:    vcmpltpd %zmm0, %zmm2, %k0
1549; NODQ-NEXT:    vcmpltpd %zmm1, %zmm2, %k1
1550; NODQ-NEXT:    kunpckbw %k0, %k1, %k1
1551; NODQ-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1552; NODQ-NEXT:    vcvtdq2pd %ymm1, %zmm0
1553; NODQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1554; NODQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
1555; NODQ-NEXT:    retq
1556;
1557; VLDQ-LABEL: sbto16f64:
1558; VLDQ:       # %bb.0:
1559; VLDQ-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1560; VLDQ-NEXT:    vcmpltpd %zmm0, %zmm2, %k0
1561; VLDQ-NEXT:    vcmpltpd %zmm1, %zmm2, %k1
1562; VLDQ-NEXT:    kunpckbw %k0, %k1, %k0
1563; VLDQ-NEXT:    vpmovm2d %k0, %zmm1
1564; VLDQ-NEXT:    vcvtdq2pd %ymm1, %zmm0
1565; VLDQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1566; VLDQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
1567; VLDQ-NEXT:    retq
1568;
1569; DQNOVL-LABEL: sbto16f64:
1570; DQNOVL:       # %bb.0:
1571; DQNOVL-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1572; DQNOVL-NEXT:    vcmpltpd %zmm0, %zmm2, %k0
1573; DQNOVL-NEXT:    vcmpltpd %zmm1, %zmm2, %k1
1574; DQNOVL-NEXT:    kunpckbw %k0, %k1, %k0
1575; DQNOVL-NEXT:    vpmovm2d %k0, %zmm1
1576; DQNOVL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1577; DQNOVL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1578; DQNOVL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1579; DQNOVL-NEXT:    retq
1580  %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
1581  %1 = sitofp <16 x i1> %cmpres to <16 x double>
1582  ret <16 x double> %1
1583}
1584
1585define <8 x double> @sbto8f64(<8 x double> %a) {
1586; NOVLDQ-LABEL: sbto8f64:
1587; NOVLDQ:       # %bb.0:
1588; NOVLDQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1589; NOVLDQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
1590; NOVLDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1591; NOVLDQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
1592; NOVLDQ-NEXT:    retq
1593;
1594; VLDQ-LABEL: sbto8f64:
1595; VLDQ:       # %bb.0:
1596; VLDQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1597; VLDQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k0
1598; VLDQ-NEXT:    vpmovm2d %k0, %ymm0
1599; VLDQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
1600; VLDQ-NEXT:    retq
1601;
1602; VLNODQ-LABEL: sbto8f64:
1603; VLNODQ:       # %bb.0:
1604; VLNODQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1605; VLNODQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
1606; VLNODQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
1607; VLNODQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1608; VLNODQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
1609; VLNODQ-NEXT:    retq
1610;
1611; DQNOVL-LABEL: sbto8f64:
1612; DQNOVL:       # %bb.0:
1613; DQNOVL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1614; DQNOVL-NEXT:    vcmpltpd %zmm0, %zmm1, %k0
1615; DQNOVL-NEXT:    vpmovm2d %k0, %zmm0
1616; DQNOVL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1617; DQNOVL-NEXT:    retq
1618  %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
1619  %1 = sitofp <8 x i1> %cmpres to <8 x double>
1620  ret <8 x double> %1
1621}
1622
1623define <8 x float> @sbto8f32(<8 x float> %a) {
1624; ALL-LABEL: sbto8f32:
1625; ALL:       # %bb.0:
1626; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1627; ALL-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
1628; ALL-NEXT:    vcvtdq2ps %ymm0, %ymm0
1629; ALL-NEXT:    retq
1630  %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
1631  %1 = sitofp <8 x i1> %cmpres to <8 x float>
1632  ret <8 x float> %1
1633}
1634
1635define <4 x float> @sbto4f32(<4 x float> %a) {
1636; ALL-LABEL: sbto4f32:
1637; ALL:       # %bb.0:
1638; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1639; ALL-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
1640; ALL-NEXT:    vcvtdq2ps %xmm0, %xmm0
1641; ALL-NEXT:    retq
1642  %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
1643  %1 = sitofp <4 x i1> %cmpres to <4 x float>
1644  ret <4 x float> %1
1645}
1646
1647define <4 x double> @sbto4f64(<4 x double> %a) {
1648; NOVL-LABEL: sbto4f64:
1649; NOVL:       # %bb.0:
1650; NOVL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1651; NOVL-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
1652; NOVL-NEXT:    vpmovqd %zmm0, %ymm0
1653; NOVL-NEXT:    vcvtdq2pd %xmm0, %ymm0
1654; NOVL-NEXT:    retq
1655;
1656; VLDQ-LABEL: sbto4f64:
1657; VLDQ:       # %bb.0:
1658; VLDQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1659; VLDQ-NEXT:    vcmpltpd %ymm0, %ymm1, %k0
1660; VLDQ-NEXT:    vpmovm2d %k0, %xmm0
1661; VLDQ-NEXT:    vcvtdq2pd %xmm0, %ymm0
1662; VLDQ-NEXT:    retq
1663;
1664; VLNODQ-LABEL: sbto4f64:
1665; VLNODQ:       # %bb.0:
1666; VLNODQ-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1667; VLNODQ-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
1668; VLNODQ-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1669; VLNODQ-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1670; VLNODQ-NEXT:    vcvtdq2pd %xmm0, %ymm0
1671; VLNODQ-NEXT:    retq
1672  %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
1673  %1 = sitofp <4 x i1> %cmpres to <4 x double>
1674  ret <4 x double> %1
1675}
1676
1677define <2 x float> @sbto2f32(<2 x float> %a) {
1678; ALL-LABEL: sbto2f32:
1679; ALL:       # %bb.0:
1680; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1681; ALL-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
1682; ALL-NEXT:    vcvtdq2ps %xmm0, %xmm0
1683; ALL-NEXT:    retq
1684  %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
1685  %1 = sitofp <2 x i1> %cmpres to <2 x float>
1686  ret <2 x float> %1
1687}
1688
1689define <2 x double> @sbto2f64(<2 x double> %a) {
1690; ALL-LABEL: sbto2f64:
1691; ALL:       # %bb.0:
1692; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
1693; ALL-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
1694; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1695; ALL-NEXT:    vcvtdq2pd %xmm0, %xmm0
1696; ALL-NEXT:    retq
1697  %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
1698  %1 = sitofp <2 x i1> %cmpres to <2 x double>
1699  ret <2 x double> %1
1700}
1701
1702define <16 x float> @ucto16f32(<16 x i8> %a) {
1703; ALL-LABEL: ucto16f32:
1704; ALL:       # %bb.0:
1705; ALL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1706; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1707; ALL-NEXT:    retq
1708  %b = uitofp <16 x i8> %a to <16 x float>
1709  ret <16 x float>%b
1710}
1711
1712define <8 x double> @ucto8f64(<8 x i8> %a) {
1713; ALL-LABEL: ucto8f64:
1714; ALL:       # %bb.0:
1715; ALL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1716; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1717; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1718; ALL-NEXT:    retq
1719  %b = uitofp <8 x i8> %a to <8 x double>
1720  ret <8 x double> %b
1721}
1722
1723define <16 x float> @swto16f32(<16 x i16> %a) {
1724; ALL-LABEL: swto16f32:
1725; ALL:       # %bb.0:
1726; ALL-NEXT:    vpmovsxwd %ymm0, %zmm0
1727; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1728; ALL-NEXT:    retq
1729  %b = sitofp <16 x i16> %a to <16 x float>
1730  ret <16 x float> %b
1731}
1732
1733define <8 x double> @swto8f64(<8 x i16> %a) {
1734; ALL-LABEL: swto8f64:
1735; ALL:       # %bb.0:
1736; ALL-NEXT:    vpmovsxwd %xmm0, %ymm0
1737; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1738; ALL-NEXT:    retq
1739  %b = sitofp <8 x i16> %a to <8 x double>
1740  ret <8 x double> %b
1741}
1742
1743define <16 x double> @swto16f64(<16 x i16> %a) {
1744; ALL-LABEL: swto16f64:
1745; ALL:       # %bb.0:
1746; ALL-NEXT:    vpmovsxwd %ymm0, %zmm1
1747; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1748; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1749; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1750; ALL-NEXT:    retq
1751  %b = sitofp <16 x i16> %a to <16 x double>
1752  ret <16 x double> %b
1753}
1754
1755define <16 x double> @ucto16f64(<16 x i8> %a) {
1756; ALL-LABEL: ucto16f64:
1757; ALL:       # %bb.0:
1758; ALL-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1759; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1760; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1761; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1762; ALL-NEXT:    retq
1763  %b = uitofp <16 x i8> %a to <16 x double>
1764  ret <16 x double> %b
1765}
1766
1767define <16 x float> @uwto16f32(<16 x i16> %a) {
1768; ALL-LABEL: uwto16f32:
1769; ALL:       # %bb.0:
1770; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1771; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1772; ALL-NEXT:    retq
1773  %b = uitofp <16 x i16> %a to <16 x float>
1774  ret <16 x float> %b
1775}
1776
1777define <8 x double> @uwto8f64(<8 x i16> %a) {
1778; ALL-LABEL: uwto8f64:
1779; ALL:       # %bb.0:
1780; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1781; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1782; ALL-NEXT:    retq
1783  %b = uitofp <8 x i16> %a to <8 x double>
1784  ret <8 x double> %b
1785}
1786
1787define <16 x double> @uwto16f64(<16 x i16> %a) {
1788; ALL-LABEL: uwto16f64:
1789; ALL:       # %bb.0:
1790; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1791; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1792; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1793; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1794; ALL-NEXT:    retq
1795  %b = uitofp <16 x i16> %a to <16 x double>
1796  ret <16 x double> %b
1797}
1798
1799define <16 x float> @sito16f32(<16 x i32> %a) {
1800; ALL-LABEL: sito16f32:
1801; ALL:       # %bb.0:
1802; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1803; ALL-NEXT:    retq
1804  %b = sitofp <16 x i32> %a to <16 x float>
1805  ret <16 x float> %b
1806}
1807
1808define <16 x double> @sito16f64(<16 x i32> %a) {
1809; ALL-LABEL: sito16f64:
1810; ALL:       # %bb.0:
1811; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm2
1812; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
1813; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm1
1814; ALL-NEXT:    vmovaps %zmm2, %zmm0
1815; ALL-NEXT:    retq
1816  %b = sitofp <16 x i32> %a to <16 x double>
1817  ret <16 x double> %b
1818}
1819
1820define <16 x float> @usto16f32(<16 x i16> %a) {
1821; ALL-LABEL: usto16f32:
1822; ALL:       # %bb.0:
1823; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1824; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1825; ALL-NEXT:    retq
1826  %b = uitofp <16 x i16> %a to <16 x float>
1827  ret <16 x float> %b
1828}
1829
1830define <16 x float> @ubto16f32(<16 x i32> %a) {
1831; NODQ-LABEL: ubto16f32:
1832; NODQ:       # %bb.0:
1833; NODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1834; NODQ-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
1835; NODQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1836; NODQ-NEXT:    vpsrld $31, %zmm0, %zmm0
1837; NODQ-NEXT:    vcvtdq2ps %zmm0, %zmm0
1838; NODQ-NEXT:    retq
1839;
1840; VLDQ-LABEL: ubto16f32:
1841; VLDQ:       # %bb.0:
1842; VLDQ-NEXT:    vpmovd2m %zmm0, %k0
1843; VLDQ-NEXT:    vpmovm2d %k0, %zmm0
1844; VLDQ-NEXT:    vpsrld $31, %zmm0, %zmm0
1845; VLDQ-NEXT:    vcvtdq2ps %zmm0, %zmm0
1846; VLDQ-NEXT:    retq
1847;
1848; DQNOVL-LABEL: ubto16f32:
1849; DQNOVL:       # %bb.0:
1850; DQNOVL-NEXT:    vpmovd2m %zmm0, %k0
1851; DQNOVL-NEXT:    vpmovm2d %k0, %zmm0
1852; DQNOVL-NEXT:    vpsrld $31, %zmm0, %zmm0
1853; DQNOVL-NEXT:    vcvtdq2ps %zmm0, %zmm0
1854; DQNOVL-NEXT:    retq
1855  %mask = icmp slt <16 x i32> %a, zeroinitializer
1856  %1 = uitofp <16 x i1> %mask to <16 x float>
1857  ret <16 x float> %1
1858}
1859
1860define <16 x double> @ubto16f64(<16 x i32> %a) {
1861; NODQ-LABEL: ubto16f64:
1862; NODQ:       # %bb.0:
1863; NODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1864; NODQ-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
1865; NODQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1866; NODQ-NEXT:    vpsrld $31, %zmm0, %zmm1
1867; NODQ-NEXT:    vcvtdq2pd %ymm1, %zmm0
1868; NODQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1869; NODQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
1870; NODQ-NEXT:    retq
1871;
1872; VLDQ-LABEL: ubto16f64:
1873; VLDQ:       # %bb.0:
1874; VLDQ-NEXT:    vpmovd2m %zmm0, %k0
1875; VLDQ-NEXT:    vpmovm2d %k0, %zmm0
1876; VLDQ-NEXT:    vpsrld $31, %zmm0, %zmm1
1877; VLDQ-NEXT:    vcvtdq2pd %ymm1, %zmm0
1878; VLDQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1879; VLDQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
1880; VLDQ-NEXT:    retq
1881;
1882; DQNOVL-LABEL: ubto16f64:
1883; DQNOVL:       # %bb.0:
1884; DQNOVL-NEXT:    vpmovd2m %zmm0, %k0
1885; DQNOVL-NEXT:    vpmovm2d %k0, %zmm0
1886; DQNOVL-NEXT:    vpsrld $31, %zmm0, %zmm1
1887; DQNOVL-NEXT:    vcvtdq2pd %ymm1, %zmm0
1888; DQNOVL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1889; DQNOVL-NEXT:    vcvtdq2pd %ymm1, %zmm1
1890; DQNOVL-NEXT:    retq
1891  %mask = icmp slt <16 x i32> %a, zeroinitializer
1892  %1 = uitofp <16 x i1> %mask to <16 x double>
1893  ret <16 x double> %1
1894}
1895
1896define <8 x float> @ubto8f32(<8 x i32> %a) {
1897; NOVL-LABEL: ubto8f32:
1898; NOVL:       # %bb.0:
1899; NOVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1900; NOVL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1901; NOVL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216]
1902; NOVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
1903; NOVL-NEXT:    retq
1904;
1905; VL-LABEL: ubto8f32:
1906; VL:       # %bb.0:
1907; VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1908; VL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1909; VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
1910; VL-NEXT:    retq
1911  %mask = icmp slt <8 x i32> %a, zeroinitializer
1912  %1 = uitofp <8 x i1> %mask to <8 x float>
1913  ret <8 x float> %1
1914}
1915
1916define <8 x double> @ubto8f64(<8 x i32> %a) {
1917; ALL-LABEL: ubto8f64:
1918; ALL:       # %bb.0:
1919; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1920; ALL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
1921; ALL-NEXT:    vpsrld $31, %ymm0, %ymm0
1922; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
1923; ALL-NEXT:    retq
1924  %mask = icmp slt <8 x i32> %a, zeroinitializer
1925  %1 = uitofp <8 x i1> %mask to <8 x double>
1926  ret <8 x double> %1
1927}
1928
1929define <4 x float> @ubto4f32(<4 x i32> %a) {
1930; NOVL-LABEL: ubto4f32:
1931; NOVL:       # %bb.0:
1932; NOVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1933; NOVL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
1934; NOVL-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1065353216,1065353216,1065353216,1065353216]
1935; NOVL-NEXT:    vpand %xmm1, %xmm0, %xmm0
1936; NOVL-NEXT:    retq
1937;
1938; VL-LABEL: ubto4f32:
1939; VL:       # %bb.0:
1940; VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1941; VL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
1942; VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
1943; VL-NEXT:    retq
1944  %mask = icmp slt <4 x i32> %a, zeroinitializer
1945  %1 = uitofp <4 x i1> %mask to <4 x float>
1946  ret <4 x float> %1
1947}
1948
1949define <4 x double> @ubto4f64(<4 x i32> %a) {
1950; ALL-LABEL: ubto4f64:
1951; ALL:       # %bb.0:
1952; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1953; ALL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
1954; ALL-NEXT:    vpsrld $31, %xmm0, %xmm0
1955; ALL-NEXT:    vcvtdq2pd %xmm0, %ymm0
1956; ALL-NEXT:    retq
1957  %mask = icmp slt <4 x i32> %a, zeroinitializer
1958  %1 = uitofp <4 x i1> %mask to <4 x double>
1959  ret <4 x double> %1
1960}
1961
1962define <2 x float> @ubto2f32(<2 x i32> %a) {
1963; ALL-LABEL: ubto2f32:
1964; ALL:       # %bb.0:
1965; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1966; ALL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1967; ALL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
1968; ALL-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0
1969; ALL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1970; ALL-NEXT:    retq
1971  %mask = icmp ne <2 x i32> %a, zeroinitializer
1972  %1 = uitofp <2 x i1> %mask to <2 x float>
1973  ret <2 x float> %1
1974}
1975
1976define <2 x double> @ubto2f64(<2 x i32> %a) {
1977; ALL-LABEL: ubto2f64:
1978; ALL:       # %bb.0:
1979; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1980; ALL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1981; ALL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
1982; ALL-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0
1983; ALL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1984; ALL-NEXT:    vcvtdq2pd %xmm0, %xmm0
1985; ALL-NEXT:    retq
1986  %mask = icmp ne <2 x i32> %a, zeroinitializer
1987  %1 = uitofp <2 x i1> %mask to <2 x double>
1988  ret <2 x double> %1
1989}
1990
1991define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) {
1992; NOVLDQ-LABEL: test_2f64toub:
1993; NOVLDQ:       # %bb.0:
1994; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1995; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1996; NOVLDQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
1997; NOVLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
1998; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
1999; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2000; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2001; NOVLDQ-NEXT:    vzeroupper
2002; NOVLDQ-NEXT:    retq
2003;
2004; VLDQ-LABEL: test_2f64toub:
2005; VLDQ:       # %bb.0:
2006; VLDQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
2007; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2008; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2009; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2010; VLDQ-NEXT:    retq
2011;
2012; VLNODQ-LABEL: test_2f64toub:
2013; VLNODQ:       # %bb.0:
2014; VLNODQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
2015; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
2016; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2017; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2018; VLNODQ-NEXT:    retq
2019;
2020; DQNOVL-LABEL: test_2f64toub:
2021; DQNOVL:       # %bb.0:
2022; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2023; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2024; DQNOVL-NEXT:    vcvttpd2udq %zmm0, %ymm0
2025; DQNOVL-NEXT:    vpslld $31, %ymm0, %ymm0
2026; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2027; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2028; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2029; DQNOVL-NEXT:    vzeroupper
2030; DQNOVL-NEXT:    retq
2031  %mask = fptoui <2 x double> %a to <2 x i1>
2032  %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
2033  ret <2 x i64> %select
2034}
2035
2036define <4 x i64> @test_4f64toub(<4 x double> %a, <4 x i64> %passthru) {
2037; NOVLDQ-LABEL: test_4f64toub:
2038; NOVLDQ:       # %bb.0:
2039; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2040; NOVLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2041; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2042; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2043; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2044; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2045; NOVLDQ-NEXT:    retq
2046;
2047; VLDQ-LABEL: test_4f64toub:
2048; VLDQ:       # %bb.0:
2049; VLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2050; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2051; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2052; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2053; VLDQ-NEXT:    retq
2054;
2055; VLNODQ-LABEL: test_4f64toub:
2056; VLNODQ:       # %bb.0:
2057; VLNODQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2058; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
2059; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2060; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2061; VLNODQ-NEXT:    retq
2062;
2063; DQNOVL-LABEL: test_4f64toub:
2064; DQNOVL:       # %bb.0:
2065; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2066; DQNOVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
2067; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
2068; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2069; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2070; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2071; DQNOVL-NEXT:    retq
2072  %mask = fptoui <4 x double> %a to <4 x i1>
2073  %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
2074  ret <4 x i64> %select
2075}
2076
2077define <8 x i64> @test_8f64toub(<8 x double> %a, <8 x i64> %passthru) {
2078; NOVLDQ-LABEL: test_8f64toub:
2079; NOVLDQ:       # %bb.0:
2080; NOVLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2081; NOVLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
2082; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2083; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2084; NOVLDQ-NEXT:    retq
2085;
2086; VLDQ-LABEL: test_8f64toub:
2087; VLDQ:       # %bb.0:
2088; VLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2089; VLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
2090; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
2091; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2092; VLDQ-NEXT:    retq
2093;
2094; VLNODQ-LABEL: test_8f64toub:
2095; VLNODQ:       # %bb.0:
2096; VLNODQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2097; VLNODQ-NEXT:    vpslld $31, %ymm0, %ymm0
2098; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
2099; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2100; VLNODQ-NEXT:    retq
2101;
2102; DQNOVL-LABEL: test_8f64toub:
2103; DQNOVL:       # %bb.0:
2104; DQNOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
2105; DQNOVL-NEXT:    vpslld $31, %ymm0, %ymm0
2106; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2107; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2108; DQNOVL-NEXT:    retq
2109  %mask = fptoui <8 x double> %a to <8 x i1>
2110  %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
2111  ret <8 x i64> %select
2112}
2113
2114define <2 x i64> @test_2f32toub(<2 x float> %a, <2 x i64> %passthru) {
2115; NOVLDQ-LABEL: test_2f32toub:
2116; NOVLDQ:       # %bb.0:
2117; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2118; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2119; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2120; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2121; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2122; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2123; NOVLDQ-NEXT:    vzeroupper
2124; NOVLDQ-NEXT:    retq
2125;
2126; VLDQ-LABEL: test_2f32toub:
2127; VLDQ:       # %bb.0:
2128; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2129; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2130; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2131; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2132; VLDQ-NEXT:    retq
2133;
2134; VLNODQ-LABEL: test_2f32toub:
2135; VLNODQ:       # %bb.0:
2136; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2137; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
2138; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2139; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2140; VLNODQ-NEXT:    retq
2141;
2142; DQNOVL-LABEL: test_2f32toub:
2143; DQNOVL:       # %bb.0:
2144; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2145; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
2146; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
2147; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2148; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2149; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2150; DQNOVL-NEXT:    vzeroupper
2151; DQNOVL-NEXT:    retq
2152  %mask = fptoui <2 x float> %a to <2 x i1>
2153  %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
2154  ret <2 x i64> %select
2155}
2156
2157define <4 x i64> @test_4f32toub(<4 x float> %a, <4 x i64> %passthru) {
2158; NOVLDQ-LABEL: test_4f32toub:
2159; NOVLDQ:       # %bb.0:
2160; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2161; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2162; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2163; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2164; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2165; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2166; NOVLDQ-NEXT:    retq
2167;
2168; VLDQ-LABEL: test_4f32toub:
2169; VLDQ:       # %bb.0:
2170; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2171; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2172; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2173; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2174; VLDQ-NEXT:    retq
2175;
2176; VLNODQ-LABEL: test_4f32toub:
2177; VLNODQ:       # %bb.0:
2178; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2179; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
2180; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2181; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2182; VLNODQ-NEXT:    retq
2183;
2184; DQNOVL-LABEL: test_4f32toub:
2185; DQNOVL:       # %bb.0:
2186; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2187; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
2188; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
2189; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2190; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2191; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2192; DQNOVL-NEXT:    retq
2193  %mask = fptoui <4 x float> %a to <4 x i1>
2194  %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
2195  ret <4 x i64> %select
2196}
2197
2198define <8 x i64> @test_8f32toub(<8 x float> %a, <8 x i64> %passthru) {
2199; NOVLDQ-LABEL: test_8f32toub:
2200; NOVLDQ:       # %bb.0:
2201; NOVLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2202; NOVLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
2203; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2204; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2205; NOVLDQ-NEXT:    retq
2206;
2207; VLDQ-LABEL: test_8f32toub:
2208; VLDQ:       # %bb.0:
2209; VLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2210; VLDQ-NEXT:    vpslld $31, %ymm0, %ymm0
2211; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
2212; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2213; VLDQ-NEXT:    retq
2214;
2215; VLNODQ-LABEL: test_8f32toub:
2216; VLNODQ:       # %bb.0:
2217; VLNODQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2218; VLNODQ-NEXT:    vpslld $31, %ymm0, %ymm0
2219; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
2220; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2221; VLNODQ-NEXT:    retq
2222;
2223; DQNOVL-LABEL: test_8f32toub:
2224; DQNOVL:       # %bb.0:
2225; DQNOVL-NEXT:    vcvttps2dq %ymm0, %ymm0
2226; DQNOVL-NEXT:    vpslld $31, %ymm0, %ymm0
2227; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2228; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2229; DQNOVL-NEXT:    retq
2230  %mask = fptoui <8 x float> %a to <8 x i1>
2231  %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
2232  ret <8 x i64> %select
2233}
2234
2235define <16 x i32> @test_16f32toub(<16 x float> %a, <16 x i32> %passthru) {
2236; NODQ-LABEL: test_16f32toub:
2237; NODQ:       # %bb.0:
2238; NODQ-NEXT:    vcvttps2dq %zmm0, %zmm0
2239; NODQ-NEXT:    vpslld $31, %zmm0, %zmm0
2240; NODQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2241; NODQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2242; NODQ-NEXT:    retq
2243;
2244; VLDQ-LABEL: test_16f32toub:
2245; VLDQ:       # %bb.0:
2246; VLDQ-NEXT:    vcvttps2dq %zmm0, %zmm0
2247; VLDQ-NEXT:    vpslld $31, %zmm0, %zmm0
2248; VLDQ-NEXT:    vpmovd2m %zmm0, %k1
2249; VLDQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2250; VLDQ-NEXT:    retq
2251;
2252; DQNOVL-LABEL: test_16f32toub:
2253; DQNOVL:       # %bb.0:
2254; DQNOVL-NEXT:    vcvttps2dq %zmm0, %zmm0
2255; DQNOVL-NEXT:    vpslld $31, %zmm0, %zmm0
2256; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2257; DQNOVL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2258; DQNOVL-NEXT:    retq
2259  %mask = fptoui <16 x float> %a to <16 x i1>
2260  %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer
2261  ret <16 x i32> %select
2262}
2263
2264define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) {
2265; NOVLDQ-LABEL: test_2f64tosb:
2266; NOVLDQ:       # %bb.0:
2267; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2268; NOVLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2269; NOVLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2270; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2271; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2272; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2273; NOVLDQ-NEXT:    vzeroupper
2274; NOVLDQ-NEXT:    retq
2275;
2276; VLDQ-LABEL: test_2f64tosb:
2277; VLDQ:       # %bb.0:
2278; VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2279; VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
2280; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2281; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2282; VLDQ-NEXT:    retq
2283;
2284; VLNODQ-LABEL: test_2f64tosb:
2285; VLNODQ:       # %bb.0:
2286; VLNODQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2287; VLNODQ-NEXT:    vpslld $31, %xmm0, %xmm0
2288; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2289; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2290; VLNODQ-NEXT:    retq
2291;
2292; DQNOVL-LABEL: test_2f64tosb:
2293; DQNOVL:       # %bb.0:
2294; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2295; DQNOVL-NEXT:    vcvttpd2dq %xmm0, %xmm0
2296; DQNOVL-NEXT:    vpslld $31, %xmm0, %xmm0
2297; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2298; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2299; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2300; DQNOVL-NEXT:    vzeroupper
2301; DQNOVL-NEXT:    retq
2302  %mask = fptosi <2 x double> %a to <2 x i1>
2303  %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
2304  ret <2 x i64> %select
2305}
2306
2307define <4 x i64> @test_4f64tosb(<4 x double> %a, <4 x i64> %passthru) {
2308; NOVLDQ-LABEL: test_4f64tosb:
2309; NOVLDQ:       # %bb.0:
2310; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2311; NOVLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2312; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2313; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2314; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2315; NOVLDQ-NEXT:    retq
2316;
2317; VLDQ-LABEL: test_4f64tosb:
2318; VLDQ:       # %bb.0:
2319; VLDQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2320; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2321; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2322; VLDQ-NEXT:    retq
2323;
2324; VLNODQ-LABEL: test_4f64tosb:
2325; VLNODQ:       # %bb.0:
2326; VLNODQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
2327; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2328; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2329; VLNODQ-NEXT:    retq
2330;
2331; DQNOVL-LABEL: test_4f64tosb:
2332; DQNOVL:       # %bb.0:
2333; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2334; DQNOVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
2335; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2336; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2337; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2338; DQNOVL-NEXT:    retq
2339  %mask = fptosi <4 x double> %a to <4 x i1>
2340  %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
2341  ret <4 x i64> %select
2342}
2343
2344define <8 x i64> @test_8f64tosb(<8 x double> %a, <8 x i64> %passthru) {
2345; NOVLDQ-LABEL: test_8f64tosb:
2346; NOVLDQ:       # %bb.0:
2347; NOVLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2348; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2349; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2350; NOVLDQ-NEXT:    retq
2351;
2352; VLDQ-LABEL: test_8f64tosb:
2353; VLDQ:       # %bb.0:
2354; VLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2355; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
2356; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2357; VLDQ-NEXT:    retq
2358;
2359; VLNODQ-LABEL: test_8f64tosb:
2360; VLNODQ:       # %bb.0:
2361; VLNODQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2362; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
2363; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2364; VLNODQ-NEXT:    retq
2365;
2366; DQNOVL-LABEL: test_8f64tosb:
2367; DQNOVL:       # %bb.0:
2368; DQNOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
2369; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2370; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2371; DQNOVL-NEXT:    retq
2372  %mask = fptosi <8 x double> %a to <8 x i1>
2373  %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
2374  ret <8 x i64> %select
2375}
2376
2377define <2 x i64> @test_2f32tosb(<2 x float> %a, <2 x i64> %passthru) {
2378; NOVLDQ-LABEL: test_2f32tosb:
2379; NOVLDQ:       # %bb.0:
2380; NOVLDQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2381; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2382; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2383; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2384; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2385; NOVLDQ-NEXT:    vzeroupper
2386; NOVLDQ-NEXT:    retq
2387;
2388; VLDQ-LABEL: test_2f32tosb:
2389; VLDQ:       # %bb.0:
2390; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2391; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2392; VLDQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2393; VLDQ-NEXT:    retq
2394;
2395; VLNODQ-LABEL: test_2f32tosb:
2396; VLNODQ:       # %bb.0:
2397; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2398; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2399; VLNODQ-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} {z}
2400; VLNODQ-NEXT:    retq
2401;
2402; DQNOVL-LABEL: test_2f32tosb:
2403; DQNOVL:       # %bb.0:
2404; DQNOVL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
2405; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
2406; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2407; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2408; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2409; DQNOVL-NEXT:    vzeroupper
2410; DQNOVL-NEXT:    retq
2411  %mask = fptosi <2 x float> %a to <2 x i1>
2412  %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer
2413  ret <2 x i64> %select
2414}
2415
2416define <4 x i64> @test_4f32tosb(<4 x float> %a, <4 x i64> %passthru) {
2417; NOVLDQ-LABEL: test_4f32tosb:
2418; NOVLDQ:       # %bb.0:
2419; NOVLDQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2420; NOVLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2421; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2422; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2423; NOVLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2424; NOVLDQ-NEXT:    retq
2425;
2426; VLDQ-LABEL: test_4f32tosb:
2427; VLDQ:       # %bb.0:
2428; VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2429; VLDQ-NEXT:    vpmovd2m %xmm0, %k1
2430; VLDQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2431; VLDQ-NEXT:    retq
2432;
2433; VLNODQ-LABEL: test_4f32tosb:
2434; VLNODQ:       # %bb.0:
2435; VLNODQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2436; VLNODQ-NEXT:    vptestmd %xmm0, %xmm0, %k1
2437; VLNODQ-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} {z}
2438; VLNODQ-NEXT:    retq
2439;
2440; DQNOVL-LABEL: test_4f32tosb:
2441; DQNOVL:       # %bb.0:
2442; DQNOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
2443; DQNOVL-NEXT:    vcvttps2dq %xmm0, %xmm0
2444; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2445; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2446; DQNOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
2447; DQNOVL-NEXT:    retq
2448  %mask = fptosi <4 x float> %a to <4 x i1>
2449  %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer
2450  ret <4 x i64> %select
2451}
2452
2453define <8 x i64> @test_8f32tosb(<8 x float> %a, <8 x i64> %passthru) {
2454; NOVLDQ-LABEL: test_8f32tosb:
2455; NOVLDQ:       # %bb.0:
2456; NOVLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2457; NOVLDQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2458; NOVLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2459; NOVLDQ-NEXT:    retq
2460;
2461; VLDQ-LABEL: test_8f32tosb:
2462; VLDQ:       # %bb.0:
2463; VLDQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2464; VLDQ-NEXT:    vpmovd2m %ymm0, %k1
2465; VLDQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2466; VLDQ-NEXT:    retq
2467;
2468; VLNODQ-LABEL: test_8f32tosb:
2469; VLNODQ:       # %bb.0:
2470; VLNODQ-NEXT:    vcvttps2dq %ymm0, %ymm0
2471; VLNODQ-NEXT:    vptestmd %ymm0, %ymm0, %k1
2472; VLNODQ-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2473; VLNODQ-NEXT:    retq
2474;
2475; DQNOVL-LABEL: test_8f32tosb:
2476; DQNOVL:       # %bb.0:
2477; DQNOVL-NEXT:    vcvttps2dq %ymm0, %ymm0
2478; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2479; DQNOVL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
2480; DQNOVL-NEXT:    retq
2481  %mask = fptosi <8 x float> %a to <8 x i1>
2482  %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer
2483  ret <8 x i64> %select
2484}
2485
2486define <16 x i32> @test_16f32tosb(<16 x float> %a, <16 x i32> %passthru) {
2487; NODQ-LABEL: test_16f32tosb:
2488; NODQ:       # %bb.0:
2489; NODQ-NEXT:    vcvttps2dq %zmm0, %zmm0
2490; NODQ-NEXT:    vptestmd %zmm0, %zmm0, %k1
2491; NODQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2492; NODQ-NEXT:    retq
2493;
2494; VLDQ-LABEL: test_16f32tosb:
2495; VLDQ:       # %bb.0:
2496; VLDQ-NEXT:    vcvttps2dq %zmm0, %zmm0
2497; VLDQ-NEXT:    vpmovd2m %zmm0, %k1
2498; VLDQ-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2499; VLDQ-NEXT:    retq
2500;
2501; DQNOVL-LABEL: test_16f32tosb:
2502; DQNOVL:       # %bb.0:
2503; DQNOVL-NEXT:    vcvttps2dq %zmm0, %zmm0
2504; DQNOVL-NEXT:    vpmovd2m %zmm0, %k1
2505; DQNOVL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
2506; DQNOVL-NEXT:    retq
2507  %mask = fptosi <16 x float> %a to <16 x i1>
2508  %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer
2509  ret <16 x i32> %select
2510}
2511