• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
3;
4; 32-bit tests to make sure we're not doing anything stupid.
5; RUN: llc < %s -mtriple=i686-unknown-unknown
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
7; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
8
9;
10; Double to Signed Integer
11;
12
13define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
14; SSE-LABEL: fptosi_2f64_to_2i64:
15; SSE:       # BB#0:
16; SSE-NEXT:    cvttsd2si %xmm0, %rax
17; SSE-NEXT:    movd %rax, %xmm1
18; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
19; SSE-NEXT:    cvttsd2si %xmm0, %rax
20; SSE-NEXT:    movd %rax, %xmm0
21; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
22; SSE-NEXT:    movdqa %xmm1, %xmm0
23; SSE-NEXT:    retq
24;
25; AVX-LABEL: fptosi_2f64_to_2i64:
26; AVX:       # BB#0:
27; AVX-NEXT:    vcvttsd2si %xmm0, %rax
28; AVX-NEXT:    vmovq %rax, %xmm1
29; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
30; AVX-NEXT:    vcvttsd2si %xmm0, %rax
31; AVX-NEXT:    vmovq %rax, %xmm0
32; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
33; AVX-NEXT:    retq
34  %cvt = fptosi <2 x double> %a to <2 x i64>
35  ret <2 x i64> %cvt
36}
37
38define <4 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
39; SSE-LABEL: fptosi_2f64_to_2i32:
40; SSE:       # BB#0:
41; SSE-NEXT:    cvttsd2si %xmm0, %rax
42; SSE-NEXT:    movd %rax, %xmm1
43; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
44; SSE-NEXT:    cvttsd2si %xmm0, %rax
45; SSE-NEXT:    movd %rax, %xmm0
46; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
47; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
48; SSE-NEXT:    retq
49;
50; AVX-LABEL: fptosi_2f64_to_2i32:
51; AVX:       # BB#0:
52; AVX-NEXT:    vcvttsd2si %xmm0, %rax
53; AVX-NEXT:    vmovq %rax, %xmm1
54; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
55; AVX-NEXT:    vcvttsd2si %xmm0, %rax
56; AVX-NEXT:    vmovq %rax, %xmm0
57; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
58; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
59; AVX-NEXT:    retq
60  %cvt = fptosi <2 x double> %a to <2 x i32>
61  %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
62  ret <4 x i32> %ext
63}
64
65define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
66; SSE-LABEL: fptosi_4f64_to_2i32:
67; SSE:       # BB#0:
68; SSE-NEXT:    cvttsd2si %xmm0, %rax
69; SSE-NEXT:    movd %rax, %xmm1
70; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
71; SSE-NEXT:    cvttsd2si %xmm0, %rax
72; SSE-NEXT:    movd %rax, %xmm0
73; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
74; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
75; SSE-NEXT:    cvttsd2si %xmm0, %rax
76; SSE-NEXT:    movd %rax, %xmm1
77; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
78; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
79; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
80; SSE-NEXT:    retq
81;
82; AVX-LABEL: fptosi_4f64_to_2i32:
83; AVX:       # BB#0:
84; AVX-NEXT:    # kill
85; AVX-NEXT:    vcvttpd2dqy %ymm0, %xmm0
86; AVX-NEXT:    vzeroupper
87; AVX-NEXT:    retq
88  %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
89  %cvt = fptosi <4 x double> %ext to <4 x i32>
90  ret <4 x i32> %cvt
91}
92
93define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
94; SSE-LABEL: fptosi_4f64_to_4i64:
95; SSE:       # BB#0:
96; SSE-NEXT:    cvttsd2si %xmm0, %rax
97; SSE-NEXT:    movd %rax, %xmm2
98; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
99; SSE-NEXT:    cvttsd2si %xmm0, %rax
100; SSE-NEXT:    movd %rax, %xmm0
101; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
102; SSE-NEXT:    cvttsd2si %xmm1, %rax
103; SSE-NEXT:    movd %rax, %xmm3
104; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
105; SSE-NEXT:    cvttsd2si %xmm1, %rax
106; SSE-NEXT:    movd %rax, %xmm0
107; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
108; SSE-NEXT:    movdqa %xmm2, %xmm0
109; SSE-NEXT:    movdqa %xmm3, %xmm1
110; SSE-NEXT:    retq
111;
112; AVX-LABEL: fptosi_4f64_to_4i64:
113; AVX:       # BB#0:
114; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
115; AVX-NEXT:    vcvttsd2si %xmm1, %rax
116; AVX-NEXT:    vmovq %rax, %xmm2
117; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
118; AVX-NEXT:    vcvttsd2si %xmm1, %rax
119; AVX-NEXT:    vmovq %rax, %xmm1
120; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
121; AVX-NEXT:    vcvttsd2si %xmm0, %rax
122; AVX-NEXT:    vmovq %rax, %xmm2
123; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
124; AVX-NEXT:    vcvttsd2si %xmm0, %rax
125; AVX-NEXT:    vmovq %rax, %xmm0
126; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
127; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
128; AVX-NEXT:    retq
129  %cvt = fptosi <4 x double> %a to <4 x i64>
130  ret <4 x i64> %cvt
131}
132
133define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
134; SSE-LABEL: fptosi_4f64_to_4i32:
135; SSE:       # BB#0:
136; SSE-NEXT:    cvttsd2si %xmm1, %rax
137; SSE-NEXT:    movd %rax, %xmm2
138; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
139; SSE-NEXT:    cvttsd2si %xmm1, %rax
140; SSE-NEXT:    movd %rax, %xmm1
141; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
142; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
143; SSE-NEXT:    cvttsd2si %xmm0, %rax
144; SSE-NEXT:    movd %rax, %xmm2
145; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
146; SSE-NEXT:    cvttsd2si %xmm0, %rax
147; SSE-NEXT:    movd %rax, %xmm0
148; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
149; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
150; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
151; SSE-NEXT:    retq
152;
153; AVX-LABEL: fptosi_4f64_to_4i32:
154; AVX:       # BB#0:
155; AVX-NEXT:    vcvttpd2dqy %ymm0, %xmm0
156; AVX-NEXT:    vzeroupper
157; AVX-NEXT:    retq
158  %cvt = fptosi <4 x double> %a to <4 x i32>
159  ret <4 x i32> %cvt
160}
161
162;
163; Double to Unsigned Integer
164;
165
166define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
167; SSE-LABEL: fptoui_2f64_to_2i64:
168; SSE:       # BB#0:
169; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
170; SSE-NEXT:    movapd %xmm0, %xmm1
171; SSE-NEXT:    subsd %xmm2, %xmm1
172; SSE-NEXT:    cvttsd2si %xmm1, %rax
173; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
174; SSE-NEXT:    xorq %rcx, %rax
175; SSE-NEXT:    cvttsd2si %xmm0, %rdx
176; SSE-NEXT:    ucomisd %xmm2, %xmm0
177; SSE-NEXT:    cmovaeq %rax, %rdx
178; SSE-NEXT:    movd %rdx, %xmm1
179; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
180; SSE-NEXT:    movapd %xmm0, %xmm3
181; SSE-NEXT:    subsd %xmm2, %xmm3
182; SSE-NEXT:    cvttsd2si %xmm3, %rax
183; SSE-NEXT:    xorq %rcx, %rax
184; SSE-NEXT:    cvttsd2si %xmm0, %rcx
185; SSE-NEXT:    ucomisd %xmm2, %xmm0
186; SSE-NEXT:    cmovaeq %rax, %rcx
187; SSE-NEXT:    movd %rcx, %xmm0
188; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
189; SSE-NEXT:    movdqa %xmm1, %xmm0
190; SSE-NEXT:    retq
191;
192; AVX-LABEL: fptoui_2f64_to_2i64:
193; AVX:       # BB#0:
194; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
195; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
196; AVX-NEXT:    vcvttsd2si %xmm2, %rax
197; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
198; AVX-NEXT:    xorq %rcx, %rax
199; AVX-NEXT:    vcvttsd2si %xmm0, %rdx
200; AVX-NEXT:    vucomisd %xmm1, %xmm0
201; AVX-NEXT:    cmovaeq %rax, %rdx
202; AVX-NEXT:    vmovq %rdx, %xmm2
203; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
204; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
205; AVX-NEXT:    vcvttsd2si %xmm3, %rax
206; AVX-NEXT:    xorq %rcx, %rax
207; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
208; AVX-NEXT:    vucomisd %xmm1, %xmm0
209; AVX-NEXT:    cmovaeq %rax, %rcx
210; AVX-NEXT:    vmovq %rcx, %xmm0
211; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
212; AVX-NEXT:    retq
213  %cvt = fptoui <2 x double> %a to <2 x i64>
214  ret <2 x i64> %cvt
215}
216
217define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
218; SSE-LABEL: fptoui_2f64_to_2i32:
219; SSE:       # BB#0:
220; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
221; SSE-NEXT:    movapd %xmm0, %xmm2
222; SSE-NEXT:    subsd %xmm1, %xmm2
223; SSE-NEXT:    cvttsd2si %xmm2, %rax
224; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
225; SSE-NEXT:    xorq %rcx, %rax
226; SSE-NEXT:    cvttsd2si %xmm0, %rdx
227; SSE-NEXT:    ucomisd %xmm1, %xmm0
228; SSE-NEXT:    cmovaeq %rax, %rdx
229; SSE-NEXT:    movd %rdx, %xmm2
230; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
231; SSE-NEXT:    movapd %xmm0, %xmm3
232; SSE-NEXT:    subsd %xmm1, %xmm3
233; SSE-NEXT:    cvttsd2si %xmm3, %rax
234; SSE-NEXT:    xorq %rcx, %rax
235; SSE-NEXT:    cvttsd2si %xmm0, %rcx
236; SSE-NEXT:    ucomisd %xmm1, %xmm0
237; SSE-NEXT:    cmovaeq %rax, %rcx
238; SSE-NEXT:    movd %rcx, %xmm0
239; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
240; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
241; SSE-NEXT:    retq
242;
243; AVX-LABEL: fptoui_2f64_to_2i32:
244; AVX:       # BB#0:
245; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
246; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
247; AVX-NEXT:    vcvttsd2si %xmm2, %rax
248; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
249; AVX-NEXT:    xorq %rcx, %rax
250; AVX-NEXT:    vcvttsd2si %xmm0, %rdx
251; AVX-NEXT:    vucomisd %xmm1, %xmm0
252; AVX-NEXT:    cmovaeq %rax, %rdx
253; AVX-NEXT:    vmovq %rdx, %xmm2
254; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
255; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
256; AVX-NEXT:    vcvttsd2si %xmm3, %rax
257; AVX-NEXT:    xorq %rcx, %rax
258; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
259; AVX-NEXT:    vucomisd %xmm1, %xmm0
260; AVX-NEXT:    cmovaeq %rax, %rcx
261; AVX-NEXT:    vmovq %rcx, %xmm0
262; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
263; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
264; AVX-NEXT:    retq
265  %cvt = fptoui <2 x double> %a to <2 x i32>
266  %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
267  ret <4 x i32> %ext
268}
269
270define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
271; SSE-LABEL: fptoui_4f64_to_2i32:
272; SSE:       # BB#0:
273; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
274; SSE-NEXT:    movapd %xmm0, %xmm2
275; SSE-NEXT:    subsd %xmm1, %xmm2
276; SSE-NEXT:    cvttsd2si %xmm2, %rax
277; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
278; SSE-NEXT:    xorq %rcx, %rax
279; SSE-NEXT:    cvttsd2si %xmm0, %rdx
280; SSE-NEXT:    ucomisd %xmm1, %xmm0
281; SSE-NEXT:    cmovaeq %rax, %rdx
282; SSE-NEXT:    movd %rdx, %xmm2
283; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
284; SSE-NEXT:    movapd %xmm0, %xmm3
285; SSE-NEXT:    subsd %xmm1, %xmm3
286; SSE-NEXT:    cvttsd2si %xmm3, %rax
287; SSE-NEXT:    xorq %rcx, %rax
288; SSE-NEXT:    cvttsd2si %xmm0, %rdx
289; SSE-NEXT:    ucomisd %xmm1, %xmm0
290; SSE-NEXT:    cmovaeq %rax, %rdx
291; SSE-NEXT:    movd %rdx, %xmm0
292; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
293; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
294; SSE-NEXT:    cvttsd2si %xmm0, %rax
295; SSE-NEXT:    xorq %rax, %rcx
296; SSE-NEXT:    ucomisd %xmm1, %xmm0
297; SSE-NEXT:    cmovbq %rax, %rcx
298; SSE-NEXT:    movd %rcx, %xmm1
299; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
300; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
301; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
302; SSE-NEXT:    retq
303;
304; AVX-LABEL: fptoui_4f64_to_2i32:
305; AVX:       # BB#0:
306; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
307; AVX-NEXT:    vcvttsd2si %xmm1, %rax
308; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
309; AVX-NEXT:    vmovd %ecx, %xmm0
310; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
311; AVX-NEXT:    vcvttsd2si %xmm0, %rax
312; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
313; AVX-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
314; AVX-NEXT:    retq
315  %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
316  %cvt = fptoui <4 x double> %ext to <4 x i32>
317  ret <4 x i32> %cvt
318}
319
320define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
321; SSE-LABEL: fptoui_4f64_to_4i64:
322; SSE:       # BB#0:
323; SSE-NEXT:    movapd %xmm0, %xmm2
324; SSE-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
325; SSE-NEXT:    subsd %xmm3, %xmm0
326; SSE-NEXT:    cvttsd2si %xmm0, %rcx
327; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
328; SSE-NEXT:    xorq %rax, %rcx
329; SSE-NEXT:    cvttsd2si %xmm2, %rdx
330; SSE-NEXT:    ucomisd %xmm3, %xmm2
331; SSE-NEXT:    cmovaeq %rcx, %rdx
332; SSE-NEXT:    movd %rdx, %xmm0
333; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
334; SSE-NEXT:    movapd %xmm2, %xmm4
335; SSE-NEXT:    subsd %xmm3, %xmm4
336; SSE-NEXT:    cvttsd2si %xmm4, %rcx
337; SSE-NEXT:    xorq %rax, %rcx
338; SSE-NEXT:    cvttsd2si %xmm2, %rdx
339; SSE-NEXT:    ucomisd %xmm3, %xmm2
340; SSE-NEXT:    cmovaeq %rcx, %rdx
341; SSE-NEXT:    movd %rdx, %xmm2
342; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
343; SSE-NEXT:    movapd %xmm1, %xmm2
344; SSE-NEXT:    subsd %xmm3, %xmm2
345; SSE-NEXT:    cvttsd2si %xmm2, %rcx
346; SSE-NEXT:    xorq %rax, %rcx
347; SSE-NEXT:    cvttsd2si %xmm1, %rdx
348; SSE-NEXT:    ucomisd %xmm3, %xmm1
349; SSE-NEXT:    cmovaeq %rcx, %rdx
350; SSE-NEXT:    movd %rdx, %xmm2
351; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
352; SSE-NEXT:    movapd %xmm1, %xmm4
353; SSE-NEXT:    subsd %xmm3, %xmm4
354; SSE-NEXT:    cvttsd2si %xmm4, %rcx
355; SSE-NEXT:    xorq %rax, %rcx
356; SSE-NEXT:    cvttsd2si %xmm1, %rax
357; SSE-NEXT:    ucomisd %xmm3, %xmm1
358; SSE-NEXT:    cmovaeq %rcx, %rax
359; SSE-NEXT:    movd %rax, %xmm1
360; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
361; SSE-NEXT:    movdqa %xmm2, %xmm1
362; SSE-NEXT:    retq
363;
364; AVX-LABEL: fptoui_4f64_to_4i64:
365; AVX:       # BB#0:
366; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm2
367; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
368; AVX-NEXT:    vsubsd %xmm1, %xmm2, %xmm3
369; AVX-NEXT:    vcvttsd2si %xmm3, %rax
370; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
371; AVX-NEXT:    xorq %rcx, %rax
372; AVX-NEXT:    vcvttsd2si %xmm2, %rdx
373; AVX-NEXT:    vucomisd %xmm1, %xmm2
374; AVX-NEXT:    cmovaeq %rax, %rdx
375; AVX-NEXT:    vmovq %rdx, %xmm3
376; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
377; AVX-NEXT:    vsubsd %xmm1, %xmm2, %xmm4
378; AVX-NEXT:    vcvttsd2si %xmm4, %rax
379; AVX-NEXT:    xorq %rcx, %rax
380; AVX-NEXT:    vcvttsd2si %xmm2, %rdx
381; AVX-NEXT:    vucomisd %xmm1, %xmm2
382; AVX-NEXT:    cmovaeq %rax, %rdx
383; AVX-NEXT:    vmovq %rdx, %xmm2
384; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
385; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
386; AVX-NEXT:    vcvttsd2si %xmm3, %rax
387; AVX-NEXT:    xorq %rcx, %rax
388; AVX-NEXT:    vcvttsd2si %xmm0, %rdx
389; AVX-NEXT:    vucomisd %xmm1, %xmm0
390; AVX-NEXT:    cmovaeq %rax, %rdx
391; AVX-NEXT:    vmovq %rdx, %xmm3
392; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
393; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm4
394; AVX-NEXT:    vcvttsd2si %xmm4, %rax
395; AVX-NEXT:    xorq %rcx, %rax
396; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
397; AVX-NEXT:    vucomisd %xmm1, %xmm0
398; AVX-NEXT:    cmovaeq %rax, %rcx
399; AVX-NEXT:    vmovq %rcx, %xmm0
400; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
401; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
402; AVX-NEXT:    retq
403  %cvt = fptoui <4 x double> %a to <4 x i64>
404  ret <4 x i64> %cvt
405}
406
407define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
408; SSE-LABEL: fptoui_4f64_to_4i32:
409; SSE:       # BB#0:
410; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
411; SSE-NEXT:    movapd %xmm1, %xmm3
412; SSE-NEXT:    subsd %xmm2, %xmm3
413; SSE-NEXT:    cvttsd2si %xmm3, %rcx
414; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
415; SSE-NEXT:    xorq %rax, %rcx
416; SSE-NEXT:    cvttsd2si %xmm1, %rdx
417; SSE-NEXT:    ucomisd %xmm2, %xmm1
418; SSE-NEXT:    cmovaeq %rcx, %rdx
419; SSE-NEXT:    movd %rdx, %xmm3
420; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
421; SSE-NEXT:    movapd %xmm1, %xmm4
422; SSE-NEXT:    subsd %xmm2, %xmm4
423; SSE-NEXT:    cvttsd2si %xmm4, %rcx
424; SSE-NEXT:    xorq %rax, %rcx
425; SSE-NEXT:    cvttsd2si %xmm1, %rdx
426; SSE-NEXT:    ucomisd %xmm2, %xmm1
427; SSE-NEXT:    cmovaeq %rcx, %rdx
428; SSE-NEXT:    movd %rdx, %xmm1
429; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
430; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
431; SSE-NEXT:    movapd %xmm0, %xmm3
432; SSE-NEXT:    subsd %xmm2, %xmm3
433; SSE-NEXT:    cvttsd2si %xmm3, %rcx
434; SSE-NEXT:    xorq %rax, %rcx
435; SSE-NEXT:    cvttsd2si %xmm0, %rdx
436; SSE-NEXT:    ucomisd %xmm2, %xmm0
437; SSE-NEXT:    cmovaeq %rcx, %rdx
438; SSE-NEXT:    movd %rdx, %xmm3
439; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
440; SSE-NEXT:    movapd %xmm0, %xmm4
441; SSE-NEXT:    subsd %xmm2, %xmm4
442; SSE-NEXT:    cvttsd2si %xmm4, %rcx
443; SSE-NEXT:    xorq %rax, %rcx
444; SSE-NEXT:    cvttsd2si %xmm0, %rax
445; SSE-NEXT:    ucomisd %xmm2, %xmm0
446; SSE-NEXT:    cmovaeq %rcx, %rax
447; SSE-NEXT:    movd %rax, %xmm0
448; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
449; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
450; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
451; SSE-NEXT:    retq
452;
453; AVX-LABEL: fptoui_4f64_to_4i32:
454; AVX:       # BB#0:
455; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
456; AVX-NEXT:    vcvttsd2si %xmm1, %rax
457; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
458; AVX-NEXT:    vmovd %ecx, %xmm1
459; AVX-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
460; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
461; AVX-NEXT:    vcvttsd2si %xmm0, %rax
462; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
463; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
464; AVX-NEXT:    vcvttsd2si %xmm0, %rax
465; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
466; AVX-NEXT:    vzeroupper
467; AVX-NEXT:    retq
468  %cvt = fptoui <4 x double> %a to <4 x i32>
469  ret <4 x i32> %cvt
470}
471
472;
473; Float to Signed Integer
474;
475
476define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
477; SSE-LABEL: fptosi_4f32_to_4i32:
478; SSE:       # BB#0:
479; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
480; SSE-NEXT:    retq
481;
482; AVX-LABEL: fptosi_4f32_to_4i32:
483; AVX:       # BB#0:
484; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
485; AVX-NEXT:    retq
486  %cvt = fptosi <4 x float> %a to <4 x i32>
487  ret <4 x i32> %cvt
488}
489
490define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
491; SSE-LABEL: fptosi_2f32_to_2i64:
492; SSE:       # BB#0:
493; SSE-NEXT:    cvttss2si %xmm0, %rax
494; SSE-NEXT:    movd %rax, %xmm1
495; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
496; SSE-NEXT:    cvttss2si %xmm0, %rax
497; SSE-NEXT:    movd %rax, %xmm0
498; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
499; SSE-NEXT:    movdqa %xmm1, %xmm0
500; SSE-NEXT:    retq
501;
502; AVX-LABEL: fptosi_2f32_to_2i64:
503; AVX:       # BB#0:
504; AVX-NEXT:    vcvttss2si %xmm0, %rax
505; AVX-NEXT:    vmovq %rax, %xmm1
506; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
507; AVX-NEXT:    vcvttss2si %xmm0, %rax
508; AVX-NEXT:    vmovq %rax, %xmm0
509; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
510; AVX-NEXT:    retq
511  %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
512  %cvt = fptosi <2 x float> %shuf to <2 x i64>
513  ret <2 x i64> %cvt
514}
515
516define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
517; SSE-LABEL: fptosi_4f32_to_2i64:
518; SSE:       # BB#0:
519; SSE-NEXT:    cvttss2si %xmm0, %rax
520; SSE-NEXT:    movd %rax, %xmm1
521; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
522; SSE-NEXT:    cvttss2si %xmm0, %rax
523; SSE-NEXT:    movd %rax, %xmm0
524; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
525; SSE-NEXT:    movdqa %xmm1, %xmm0
526; SSE-NEXT:    retq
527;
528; AVX-LABEL: fptosi_4f32_to_2i64:
529; AVX:       # BB#0:
530; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
531; AVX-NEXT:    vcvttss2si %xmm1, %rax
532; AVX-NEXT:    vcvttss2si %xmm0, %rcx
533; AVX-NEXT:    vmovq %rcx, %xmm0
534; AVX-NEXT:    vmovq %rax, %xmm1
535; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
536; AVX-NEXT:    retq
537  %cvt = fptosi <4 x float> %a to <4 x i64>
538  %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
539  ret <2 x i64> %shuf
540}
541
542define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
543; SSE-LABEL: fptosi_8f32_to_8i32:
544; SSE:       # BB#0:
545; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
546; SSE-NEXT:    cvttps2dq %xmm1, %xmm1
547; SSE-NEXT:    retq
548;
549; AVX-LABEL: fptosi_8f32_to_8i32:
550; AVX:       # BB#0:
551; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
552; AVX-NEXT:    retq
553  %cvt = fptosi <8 x float> %a to <8 x i32>
554  ret <8 x i32> %cvt
555}
556
557define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
558; SSE-LABEL: fptosi_4f32_to_4i64:
559; SSE:       # BB#0:
560; SSE-NEXT:    cvttss2si %xmm0, %rax
561; SSE-NEXT:    movd %rax, %xmm2
562; SSE-NEXT:    movaps %xmm0, %xmm1
563; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
564; SSE-NEXT:    cvttss2si %xmm1, %rax
565; SSE-NEXT:    movd %rax, %xmm1
566; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
567; SSE-NEXT:    movaps %xmm0, %xmm1
568; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
569; SSE-NEXT:    cvttss2si %xmm1, %rax
570; SSE-NEXT:    movd %rax, %xmm3
571; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
572; SSE-NEXT:    cvttss2si %xmm0, %rax
573; SSE-NEXT:    movd %rax, %xmm1
574; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
575; SSE-NEXT:    movdqa %xmm2, %xmm0
576; SSE-NEXT:    retq
577;
578; AVX-LABEL: fptosi_4f32_to_4i64:
579; AVX:       # BB#0:
580; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
581; AVX-NEXT:    vcvttss2si %xmm1, %rax
582; AVX-NEXT:    vmovq %rax, %xmm1
583; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
584; AVX-NEXT:    vcvttss2si %xmm2, %rax
585; AVX-NEXT:    vmovq %rax, %xmm2
586; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
587; AVX-NEXT:    vcvttss2si %xmm0, %rax
588; AVX-NEXT:    vmovq %rax, %xmm2
589; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
590; AVX-NEXT:    vcvttss2si %xmm0, %rax
591; AVX-NEXT:    vmovq %rax, %xmm0
592; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
593; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
594; AVX-NEXT:    retq
595  %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
596  %cvt = fptosi <4 x float> %shuf to <4 x i64>
597  ret <4 x i64> %cvt
598}
599
600define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
601; SSE-LABEL: fptosi_8f32_to_4i64:
602; SSE:       # BB#0:
603; SSE-NEXT:    cvttss2si %xmm0, %rax
604; SSE-NEXT:    movd %rax, %xmm2
605; SSE-NEXT:    movaps %xmm0, %xmm1
606; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
607; SSE-NEXT:    cvttss2si %xmm1, %rax
608; SSE-NEXT:    movd %rax, %xmm1
609; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
610; SSE-NEXT:    movaps %xmm0, %xmm1
611; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
612; SSE-NEXT:    cvttss2si %xmm1, %rax
613; SSE-NEXT:    movd %rax, %xmm3
614; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
615; SSE-NEXT:    cvttss2si %xmm0, %rax
616; SSE-NEXT:    movd %rax, %xmm1
617; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
618; SSE-NEXT:    movdqa %xmm2, %xmm0
619; SSE-NEXT:    retq
620;
621; AVX-LABEL: fptosi_8f32_to_4i64:
622; AVX:       # BB#0:
623; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
624; AVX-NEXT:    vcvttss2si %xmm1, %rax
625; AVX-NEXT:    vmovq %rax, %xmm1
626; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
627; AVX-NEXT:    vcvttss2si %xmm2, %rax
628; AVX-NEXT:    vmovq %rax, %xmm2
629; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
630; AVX-NEXT:    vcvttss2si %xmm0, %rax
631; AVX-NEXT:    vmovq %rax, %xmm2
632; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
633; AVX-NEXT:    vcvttss2si %xmm0, %rax
634; AVX-NEXT:    vmovq %rax, %xmm0
635; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
636; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
637; AVX-NEXT:    retq
638  %cvt = fptosi <8 x float> %a to <8 x i64>
639  %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
640  ret <4 x i64> %shuf
641}
642
643;
644; Float to Unsigned Integer
645;
646
647define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
648; SSE-LABEL: fptoui_4f32_to_4i32:
649; SSE:       # BB#0:
650; SSE-NEXT:    movaps %xmm0, %xmm1
651; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
652; SSE-NEXT:    cvttss2si %xmm1, %rax
653; SSE-NEXT:    movd %eax, %xmm1
654; SSE-NEXT:    movaps %xmm0, %xmm2
655; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
656; SSE-NEXT:    cvttss2si %xmm2, %rax
657; SSE-NEXT:    movd %eax, %xmm2
658; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
659; SSE-NEXT:    cvttss2si %xmm0, %rax
660; SSE-NEXT:    movd %eax, %xmm1
661; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
662; SSE-NEXT:    cvttss2si %xmm0, %rax
663; SSE-NEXT:    movd %eax, %xmm0
664; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
665; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
666; SSE-NEXT:    movdqa %xmm1, %xmm0
667; SSE-NEXT:    retq
668;
669; AVX-LABEL: fptoui_4f32_to_4i32:
670; AVX:       # BB#0:
671; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
672; AVX-NEXT:    vcvttss2si %xmm1, %rax
673; AVX-NEXT:    vcvttss2si %xmm0, %rcx
674; AVX-NEXT:    vmovd %ecx, %xmm1
675; AVX-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
676; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
677; AVX-NEXT:    vcvttss2si %xmm2, %rax
678; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
679; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
680; AVX-NEXT:    vcvttss2si %xmm0, %rax
681; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
682; AVX-NEXT:    retq
683  %cvt = fptoui <4 x float> %a to <4 x i32>
684  ret <4 x i32> %cvt
685}
686
687define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
688; SSE-LABEL: fptoui_2f32_to_2i64:
689; SSE:       # BB#0:
690; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
691; SSE-NEXT:    movaps %xmm0, %xmm1
692; SSE-NEXT:    subss %xmm2, %xmm1
693; SSE-NEXT:    cvttss2si %xmm1, %rax
694; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
695; SSE-NEXT:    xorq %rcx, %rax
696; SSE-NEXT:    cvttss2si %xmm0, %rdx
697; SSE-NEXT:    ucomiss %xmm2, %xmm0
698; SSE-NEXT:    cmovaeq %rax, %rdx
699; SSE-NEXT:    movd %rdx, %xmm1
700; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
701; SSE-NEXT:    movaps %xmm0, %xmm3
702; SSE-NEXT:    subss %xmm2, %xmm3
703; SSE-NEXT:    cvttss2si %xmm3, %rax
704; SSE-NEXT:    xorq %rcx, %rax
705; SSE-NEXT:    cvttss2si %xmm0, %rcx
706; SSE-NEXT:    ucomiss %xmm2, %xmm0
707; SSE-NEXT:    cmovaeq %rax, %rcx
708; SSE-NEXT:    movd %rcx, %xmm0
709; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
710; SSE-NEXT:    movdqa %xmm1, %xmm0
711; SSE-NEXT:    retq
712;
713; AVX-LABEL: fptoui_2f32_to_2i64:
714; AVX:       # BB#0:
715; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
716; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm2
717; AVX-NEXT:    vcvttss2si %xmm2, %rax
718; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
719; AVX-NEXT:    xorq %rcx, %rax
720; AVX-NEXT:    vcvttss2si %xmm0, %rdx
721; AVX-NEXT:    vucomiss %xmm1, %xmm0
722; AVX-NEXT:    cmovaeq %rax, %rdx
723; AVX-NEXT:    vmovq %rdx, %xmm2
724; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
725; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
726; AVX-NEXT:    vcvttss2si %xmm3, %rax
727; AVX-NEXT:    xorq %rcx, %rax
728; AVX-NEXT:    vcvttss2si %xmm0, %rcx
729; AVX-NEXT:    vucomiss %xmm1, %xmm0
730; AVX-NEXT:    cmovaeq %rax, %rcx
731; AVX-NEXT:    vmovq %rcx, %xmm0
732; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
733; AVX-NEXT:    retq
734  %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
735  %cvt = fptoui <2 x float> %shuf to <2 x i64>
736  ret <2 x i64> %cvt
737}
738
739define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
740; SSE-LABEL: fptoui_4f32_to_2i64:
741; SSE:       # BB#0:
742; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
743; SSE-NEXT:    movaps %xmm0, %xmm1
744; SSE-NEXT:    subss %xmm2, %xmm1
745; SSE-NEXT:    cvttss2si %xmm1, %rax
746; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
747; SSE-NEXT:    xorq %rcx, %rax
748; SSE-NEXT:    cvttss2si %xmm0, %rdx
749; SSE-NEXT:    ucomiss %xmm2, %xmm0
750; SSE-NEXT:    cmovaeq %rax, %rdx
751; SSE-NEXT:    movd %rdx, %xmm1
752; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
753; SSE-NEXT:    movaps %xmm0, %xmm3
754; SSE-NEXT:    subss %xmm2, %xmm3
755; SSE-NEXT:    cvttss2si %xmm3, %rax
756; SSE-NEXT:    xorq %rcx, %rax
757; SSE-NEXT:    cvttss2si %xmm0, %rcx
758; SSE-NEXT:    ucomiss %xmm2, %xmm0
759; SSE-NEXT:    cmovaeq %rax, %rcx
760; SSE-NEXT:    movd %rcx, %xmm0
761; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
762; SSE-NEXT:    movdqa %xmm1, %xmm0
763; SSE-NEXT:    retq
764;
765; AVX-LABEL: fptoui_4f32_to_2i64:
766; AVX:       # BB#0:
767; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
768; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
769; AVX-NEXT:    vsubss %xmm2, %xmm1, %xmm3
770; AVX-NEXT:    vcvttss2si %xmm3, %rax
771; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
772; AVX-NEXT:    xorq %rcx, %rax
773; AVX-NEXT:    vcvttss2si %xmm1, %rdx
774; AVX-NEXT:    vucomiss %xmm2, %xmm1
775; AVX-NEXT:    cmovaeq %rax, %rdx
776; AVX-NEXT:    vsubss %xmm2, %xmm0, %xmm1
777; AVX-NEXT:    vcvttss2si %xmm1, %rax
778; AVX-NEXT:    xorq %rcx, %rax
779; AVX-NEXT:    vcvttss2si %xmm0, %rcx
780; AVX-NEXT:    vucomiss %xmm2, %xmm0
781; AVX-NEXT:    cmovaeq %rax, %rcx
782; AVX-NEXT:    vmovq %rcx, %xmm0
783; AVX-NEXT:    vmovq %rdx, %xmm1
784; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
785; AVX-NEXT:    retq
786  %cvt = fptoui <4 x float> %a to <4 x i64>
787  %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
788  ret <2 x i64> %shuf
789}
790
791define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
792; SSE-LABEL: fptoui_8f32_to_8i32:
793; SSE:       # BB#0:
794; SSE-NEXT:    movaps %xmm0, %xmm2
795; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
796; SSE-NEXT:    cvttss2si %xmm0, %rax
797; SSE-NEXT:    movd %eax, %xmm0
798; SSE-NEXT:    movaps %xmm2, %xmm3
799; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
800; SSE-NEXT:    cvttss2si %xmm3, %rax
801; SSE-NEXT:    movd %eax, %xmm3
802; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
803; SSE-NEXT:    cvttss2si %xmm2, %rax
804; SSE-NEXT:    movd %eax, %xmm0
805; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
806; SSE-NEXT:    cvttss2si %xmm2, %rax
807; SSE-NEXT:    movd %eax, %xmm2
808; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
809; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
810; SSE-NEXT:    movaps %xmm1, %xmm2
811; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
812; SSE-NEXT:    cvttss2si %xmm2, %rax
813; SSE-NEXT:    movd %eax, %xmm2
814; SSE-NEXT:    movaps %xmm1, %xmm3
815; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
816; SSE-NEXT:    cvttss2si %xmm3, %rax
817; SSE-NEXT:    movd %eax, %xmm3
818; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
819; SSE-NEXT:    cvttss2si %xmm1, %rax
820; SSE-NEXT:    movd %eax, %xmm2
821; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
822; SSE-NEXT:    cvttss2si %xmm1, %rax
823; SSE-NEXT:    movd %eax, %xmm1
824; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
825; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
826; SSE-NEXT:    movdqa %xmm2, %xmm1
827; SSE-NEXT:    retq
828;
829; AVX-LABEL: fptoui_8f32_to_8i32:
830; AVX:       # BB#0:
831; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
832; AVX-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
833; AVX-NEXT:    vcvttss2si %xmm2, %rax
834; AVX-NEXT:    vcvttss2si %xmm1, %rcx
835; AVX-NEXT:    vmovd %ecx, %xmm2
836; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
837; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
838; AVX-NEXT:    vcvttss2si %xmm3, %rax
839; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
840; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
841; AVX-NEXT:    vcvttss2si %xmm1, %rax
842; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
843; AVX-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
844; AVX-NEXT:    vcvttss2si %xmm2, %rax
845; AVX-NEXT:    vcvttss2si %xmm0, %rcx
846; AVX-NEXT:    vmovd %ecx, %xmm2
847; AVX-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
848; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
849; AVX-NEXT:    vcvttss2si %xmm3, %rax
850; AVX-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
851; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
852; AVX-NEXT:    vcvttss2si %xmm0, %rax
853; AVX-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
854; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
855; AVX-NEXT:    retq
856  %cvt = fptoui <8 x float> %a to <8 x i32>
857  ret <8 x i32> %cvt
858}
859
860define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
861; SSE-LABEL: fptoui_4f32_to_4i64:
862; SSE:       # BB#0:
863; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
864; SSE-NEXT:    movaps %xmm0, %xmm2
865; SSE-NEXT:    subss %xmm1, %xmm2
866; SSE-NEXT:    cvttss2si %xmm2, %rcx
867; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
868; SSE-NEXT:    xorq %rax, %rcx
869; SSE-NEXT:    cvttss2si %xmm0, %rdx
870; SSE-NEXT:    ucomiss %xmm1, %xmm0
871; SSE-NEXT:    cmovaeq %rcx, %rdx
872; SSE-NEXT:    movd %rdx, %xmm2
873; SSE-NEXT:    movaps %xmm0, %xmm3
874; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
875; SSE-NEXT:    movaps %xmm3, %xmm4
876; SSE-NEXT:    subss %xmm1, %xmm4
877; SSE-NEXT:    cvttss2si %xmm4, %rcx
878; SSE-NEXT:    xorq %rax, %rcx
879; SSE-NEXT:    cvttss2si %xmm3, %rdx
880; SSE-NEXT:    ucomiss %xmm1, %xmm3
881; SSE-NEXT:    cmovaeq %rcx, %rdx
882; SSE-NEXT:    movd %rdx, %xmm3
883; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
884; SSE-NEXT:    movaps %xmm0, %xmm3
885; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
886; SSE-NEXT:    movaps %xmm3, %xmm4
887; SSE-NEXT:    subss %xmm1, %xmm4
888; SSE-NEXT:    cvttss2si %xmm4, %rcx
889; SSE-NEXT:    xorq %rax, %rcx
890; SSE-NEXT:    cvttss2si %xmm3, %rdx
891; SSE-NEXT:    ucomiss %xmm1, %xmm3
892; SSE-NEXT:    cmovaeq %rcx, %rdx
893; SSE-NEXT:    movd %rdx, %xmm3
894; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
895; SSE-NEXT:    movapd %xmm0, %xmm4
896; SSE-NEXT:    subss %xmm1, %xmm4
897; SSE-NEXT:    cvttss2si %xmm4, %rcx
898; SSE-NEXT:    xorq %rax, %rcx
899; SSE-NEXT:    cvttss2si %xmm0, %rax
900; SSE-NEXT:    ucomiss %xmm1, %xmm0
901; SSE-NEXT:    cmovaeq %rcx, %rax
902; SSE-NEXT:    movd %rax, %xmm1
903; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
904; SSE-NEXT:    movdqa %xmm2, %xmm0
905; SSE-NEXT:    retq
906;
907; AVX-LABEL: fptoui_4f32_to_4i64:
908; AVX:       # BB#0:
909; AVX-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
910; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
911; AVX-NEXT:    vsubss %xmm1, %xmm2, %xmm3
912; AVX-NEXT:    vcvttss2si %xmm3, %rax
913; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
914; AVX-NEXT:    xorq %rcx, %rax
915; AVX-NEXT:    vcvttss2si %xmm2, %rdx
916; AVX-NEXT:    vucomiss %xmm1, %xmm2
917; AVX-NEXT:    cmovaeq %rax, %rdx
918; AVX-NEXT:    vmovq %rdx, %xmm2
919; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
920; AVX-NEXT:    vsubss %xmm1, %xmm3, %xmm4
921; AVX-NEXT:    vcvttss2si %xmm4, %rax
922; AVX-NEXT:    xorq %rcx, %rax
923; AVX-NEXT:    vcvttss2si %xmm3, %rdx
924; AVX-NEXT:    vucomiss %xmm1, %xmm3
925; AVX-NEXT:    cmovaeq %rax, %rdx
926; AVX-NEXT:    vmovq %rdx, %xmm3
927; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
928; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
929; AVX-NEXT:    vcvttss2si %xmm3, %rax
930; AVX-NEXT:    xorq %rcx, %rax
931; AVX-NEXT:    vcvttss2si %xmm0, %rdx
932; AVX-NEXT:    vucomiss %xmm1, %xmm0
933; AVX-NEXT:    cmovaeq %rax, %rdx
934; AVX-NEXT:    vmovq %rdx, %xmm3
935; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
936; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm4
937; AVX-NEXT:    vcvttss2si %xmm4, %rax
938; AVX-NEXT:    xorq %rcx, %rax
939; AVX-NEXT:    vcvttss2si %xmm0, %rcx
940; AVX-NEXT:    vucomiss %xmm1, %xmm0
941; AVX-NEXT:    cmovaeq %rax, %rcx
942; AVX-NEXT:    vmovq %rcx, %xmm0
943; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
944; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
945; AVX-NEXT:    retq
946  %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
947  %cvt = fptoui <4 x float> %shuf to <4 x i64>
948  ret <4 x i64> %cvt
949}
950
951define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
952; SSE-LABEL: fptoui_8f32_to_4i64:
953; SSE:       # BB#0:
954; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
955; SSE-NEXT:    movaps %xmm0, %xmm2
956; SSE-NEXT:    subss %xmm1, %xmm2
957; SSE-NEXT:    cvttss2si %xmm2, %rcx
958; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
959; SSE-NEXT:    xorq %rax, %rcx
960; SSE-NEXT:    cvttss2si %xmm0, %rdx
961; SSE-NEXT:    ucomiss %xmm1, %xmm0
962; SSE-NEXT:    cmovaeq %rcx, %rdx
963; SSE-NEXT:    movd %rdx, %xmm2
964; SSE-NEXT:    movaps %xmm0, %xmm3
965; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
966; SSE-NEXT:    movaps %xmm3, %xmm4
967; SSE-NEXT:    subss %xmm1, %xmm4
968; SSE-NEXT:    cvttss2si %xmm4, %rcx
969; SSE-NEXT:    xorq %rax, %rcx
970; SSE-NEXT:    cvttss2si %xmm3, %rdx
971; SSE-NEXT:    ucomiss %xmm1, %xmm3
972; SSE-NEXT:    cmovaeq %rcx, %rdx
973; SSE-NEXT:    movd %rdx, %xmm3
974; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
975; SSE-NEXT:    movaps %xmm0, %xmm3
976; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
977; SSE-NEXT:    movaps %xmm3, %xmm4
978; SSE-NEXT:    subss %xmm1, %xmm4
979; SSE-NEXT:    cvttss2si %xmm4, %rcx
980; SSE-NEXT:    xorq %rax, %rcx
981; SSE-NEXT:    cvttss2si %xmm3, %rdx
982; SSE-NEXT:    ucomiss %xmm1, %xmm3
983; SSE-NEXT:    cmovaeq %rcx, %rdx
984; SSE-NEXT:    movd %rdx, %xmm3
985; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
986; SSE-NEXT:    movapd %xmm0, %xmm4
987; SSE-NEXT:    subss %xmm1, %xmm4
988; SSE-NEXT:    cvttss2si %xmm4, %rcx
989; SSE-NEXT:    xorq %rax, %rcx
990; SSE-NEXT:    cvttss2si %xmm0, %rax
991; SSE-NEXT:    ucomiss %xmm1, %xmm0
992; SSE-NEXT:    cmovaeq %rcx, %rax
993; SSE-NEXT:    movd %rax, %xmm1
994; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
995; SSE-NEXT:    movdqa %xmm2, %xmm0
996; SSE-NEXT:    retq
997;
998; AVX-LABEL: fptoui_8f32_to_4i64:
999; AVX:       # BB#0:
1000; AVX-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
1001; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1002; AVX-NEXT:    vsubss %xmm1, %xmm2, %xmm3
1003; AVX-NEXT:    vcvttss2si %xmm3, %rax
1004; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1005; AVX-NEXT:    xorq %rcx, %rax
1006; AVX-NEXT:    vcvttss2si %xmm2, %rdx
1007; AVX-NEXT:    vucomiss %xmm1, %xmm2
1008; AVX-NEXT:    cmovaeq %rax, %rdx
1009; AVX-NEXT:    vmovq %rdx, %xmm2
1010; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1011; AVX-NEXT:    vsubss %xmm1, %xmm3, %xmm4
1012; AVX-NEXT:    vcvttss2si %xmm4, %rax
1013; AVX-NEXT:    xorq %rcx, %rax
1014; AVX-NEXT:    vcvttss2si %xmm3, %rdx
1015; AVX-NEXT:    vucomiss %xmm1, %xmm3
1016; AVX-NEXT:    cmovaeq %rax, %rdx
1017; AVX-NEXT:    vmovq %rdx, %xmm3
1018; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1019; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
1020; AVX-NEXT:    vcvttss2si %xmm3, %rax
1021; AVX-NEXT:    xorq %rcx, %rax
1022; AVX-NEXT:    vcvttss2si %xmm0, %rdx
1023; AVX-NEXT:    vucomiss %xmm1, %xmm0
1024; AVX-NEXT:    cmovaeq %rax, %rdx
1025; AVX-NEXT:    vmovq %rdx, %xmm3
1026; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1027; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm4
1028; AVX-NEXT:    vcvttss2si %xmm4, %rax
1029; AVX-NEXT:    xorq %rcx, %rax
1030; AVX-NEXT:    vcvttss2si %xmm0, %rcx
1031; AVX-NEXT:    vucomiss %xmm1, %xmm0
1032; AVX-NEXT:    cmovaeq %rax, %rcx
1033; AVX-NEXT:    vmovq %rcx, %xmm0
1034; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1035; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1036; AVX-NEXT:    retq
1037  %cvt = fptoui <8 x float> %a to <8 x i64>
1038  %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1039  ret <4 x i64> %shuf
1040}
1041
1042;
1043; Constant Folding
1044;
1045
1046define <2 x i64> @fptosi_2f64_to_2i64_const() {
1047; SSE-LABEL: fptosi_2f64_to_2i64_const:
1048; SSE:       # BB#0:
1049; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1050; SSE-NEXT:    retq
1051;
1052; AVX-LABEL: fptosi_2f64_to_2i64_const:
1053; AVX:       # BB#0:
1054; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
1055; AVX-NEXT:    retq
1056  %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
1057  ret <2 x i64> %cvt
1058}
1059
1060define <4 x i32> @fptosi_2f64_to_2i32_const() {
1061; SSE-LABEL: fptosi_2f64_to_2i32_const:
1062; SSE:       # BB#0:
1063; SSE-NEXT:    movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
1064; SSE-NEXT:    retq
1065;
1066; AVX-LABEL: fptosi_2f64_to_2i32_const:
1067; AVX:       # BB#0:
1068; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
1069; AVX-NEXT:    retq
1070  %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
1071  %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1072  ret <4 x i32> %ext
1073}
1074
1075define <4 x i64> @fptosi_4f64_to_4i64_const() {
1076; SSE-LABEL: fptosi_4f64_to_4i64_const:
1077; SSE:       # BB#0:
1078; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1079; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,18446744073709551613]
1080; SSE-NEXT:    retq
1081;
1082; AVX-LABEL: fptosi_4f64_to_4i64_const:
1083; AVX:       # BB#0:
1084; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
1085; AVX-NEXT:    retq
1086  %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
1087  ret <4 x i64> %cvt
1088}
1089
1090define <4 x i32> @fptosi_4f64_to_4i32_const() {
1091; SSE-LABEL: fptosi_4f64_to_4i32_const:
1092; SSE:       # BB#0:
1093; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1094; SSE-NEXT:    retq
1095;
1096; AVX-LABEL: fptosi_4f64_to_4i32_const:
1097; AVX:       # BB#0:
1098; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1099; AVX-NEXT:    retq
1100  %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
1101  ret <4 x i32> %cvt
1102}
1103
1104define <2 x i64> @fptoui_2f64_to_2i64_const() {
1105; SSE-LABEL: fptoui_2f64_to_2i64_const:
1106; SSE:       # BB#0:
1107; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
1108; SSE-NEXT:    retq
1109;
1110; AVX-LABEL: fptoui_2f64_to_2i64_const:
1111; AVX:       # BB#0:
1112; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4]
1113; AVX-NEXT:    retq
1114  %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
1115  ret <2 x i64> %cvt
1116}
1117
1118define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
1119; SSE-LABEL: fptoui_2f64_to_2i32_const:
1120; SSE:       # BB#0:
1121; SSE-NEXT:    movaps {{.*#+}} xmm0 = <2,4,u,u>
1122; SSE-NEXT:    retq
1123;
1124; AVX-LABEL: fptoui_2f64_to_2i32_const:
1125; AVX:       # BB#0:
1126; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = <2,4,u,u>
1127; AVX-NEXT:    retq
1128  %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
1129  %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1130  ret <4 x i32> %ext
1131}
1132
1133define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
1134; SSE-LABEL: fptoui_4f64_to_4i64_const:
1135; SSE:       # BB#0:
1136; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
1137; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,8]
1138; SSE-NEXT:    retq
1139;
1140; AVX-LABEL: fptoui_4f64_to_4i64_const:
1141; AVX:       # BB#0:
1142; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [2,4,6,8]
1143; AVX-NEXT:    retq
1144  %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
1145  ret <4 x i64> %cvt
1146}
1147
1148define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
1149; SSE-LABEL: fptoui_4f64_to_4i32_const:
1150; SSE:       # BB#0:
1151; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4,6,8]
1152; SSE-NEXT:    retq
1153;
1154; AVX-LABEL: fptoui_4f64_to_4i32_const:
1155; AVX:       # BB#0:
1156; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4,6,8]
1157; AVX-NEXT:    retq
1158  %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
1159  ret <4 x i32> %cvt
1160}
1161
1162define <4 x i32> @fptosi_4f32_to_4i32_const() {
1163; SSE-LABEL: fptosi_4f32_to_4i32_const:
1164; SSE:       # BB#0:
1165; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
1166; SSE-NEXT:    retq
1167;
1168; AVX-LABEL: fptosi_4f32_to_4i32_const:
1169; AVX:       # BB#0:
1170; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
1171; AVX-NEXT:    retq
1172  %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
1173  ret <4 x i32> %cvt
1174}
1175
1176define <4 x i64> @fptosi_4f32_to_4i64_const() {
1177; SSE-LABEL: fptosi_4f32_to_4i64_const:
1178; SSE:       # BB#0:
1179; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1180; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,3]
1181; SSE-NEXT:    retq
1182;
1183; AVX-LABEL: fptosi_4f32_to_4i64_const:
1184; AVX:       # BB#0:
1185; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
1186; AVX-NEXT:    retq
1187  %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
1188  ret <4 x i64> %cvt
1189}
1190
1191define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
1192; SSE-LABEL: fptosi_8f32_to_8i32_const:
1193; SSE:       # BB#0:
1194; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
1195; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
1196; SSE-NEXT:    retq
1197;
1198; AVX-LABEL: fptosi_8f32_to_8i32_const:
1199; AVX:       # BB#0:
1200; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
1201; AVX-NEXT:    retq
1202  %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
1203  ret <8 x i32> %cvt
1204}
1205
1206define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
1207; SSE-LABEL: fptoui_4f32_to_4i32_const:
1208; SSE:       # BB#0:
1209; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
1210; SSE-NEXT:    retq
1211;
1212; AVX-LABEL: fptoui_4f32_to_4i32_const:
1213; AVX:       # BB#0:
1214; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,4,6]
1215; AVX-NEXT:    retq
1216  %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
1217  ret <4 x i32> %cvt
1218}
1219
1220define <4 x i64> @fptoui_4f32_to_4i64_const() {
1221; SSE-LABEL: fptoui_4f32_to_4i64_const:
1222; SSE:       # BB#0:
1223; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2]
1224; SSE-NEXT:    movaps {{.*#+}} xmm1 = [4,8]
1225; SSE-NEXT:    retq
1226;
1227; AVX-LABEL: fptoui_4f32_to_4i64_const:
1228; AVX:       # BB#0:
1229; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,8]
1230; AVX-NEXT:    retq
1231  %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
1232  ret <4 x i64> %cvt
1233}
1234
1235define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
1236; SSE-LABEL: fptoui_8f32_to_8i32_const:
1237; SSE:       # BB#0:
1238; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
1239; SSE-NEXT:    movaps {{.*#+}} xmm1 = [8,6,4,1]
1240; SSE-NEXT:    retq
1241;
1242; AVX-LABEL: fptoui_8f32_to_8i32_const:
1243; AVX:       # BB#0:
1244; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
1245; AVX-NEXT:    retq
1246  %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
1247  ret <8 x i32> %cvt
1248}
1249