• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,VEX,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,VEX,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLDQ
9;
10; 32-bit tests to make sure we're not doing anything stupid.
11; RUN: llc < %s -mtriple=i686-unknown-unknown
12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
13; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
14
15;
16; Double to Signed Integer
17;
18
19define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
20; SSE-LABEL: fptosi_2f64_to_2i64:
21; SSE:       # %bb.0:
22; SSE-NEXT:    cvttsd2si %xmm0, %rax
23; SSE-NEXT:    movq %rax, %xmm1
24; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
25; SSE-NEXT:    cvttsd2si %xmm0, %rax
26; SSE-NEXT:    movq %rax, %xmm0
27; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
28; SSE-NEXT:    movdqa %xmm1, %xmm0
29; SSE-NEXT:    retq
30;
31; VEX-LABEL: fptosi_2f64_to_2i64:
32; VEX:       # %bb.0:
33; VEX-NEXT:    vcvttsd2si %xmm0, %rax
34; VEX-NEXT:    vmovq %rax, %xmm1
35; VEX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
36; VEX-NEXT:    vcvttsd2si %xmm0, %rax
37; VEX-NEXT:    vmovq %rax, %xmm0
38; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
39; VEX-NEXT:    retq
40;
41; AVX512F-LABEL: fptosi_2f64_to_2i64:
42; AVX512F:       # %bb.0:
43; AVX512F-NEXT:    vcvttsd2si %xmm0, %rax
44; AVX512F-NEXT:    vmovq %rax, %xmm1
45; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
46; AVX512F-NEXT:    vcvttsd2si %xmm0, %rax
47; AVX512F-NEXT:    vmovq %rax, %xmm0
48; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
49; AVX512F-NEXT:    retq
50;
51; AVX512VL-LABEL: fptosi_2f64_to_2i64:
52; AVX512VL:       # %bb.0:
53; AVX512VL-NEXT:    vcvttsd2si %xmm0, %rax
54; AVX512VL-NEXT:    vmovq %rax, %xmm1
55; AVX512VL-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
56; AVX512VL-NEXT:    vcvttsd2si %xmm0, %rax
57; AVX512VL-NEXT:    vmovq %rax, %xmm0
58; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
59; AVX512VL-NEXT:    retq
60;
61; AVX512DQ-LABEL: fptosi_2f64_to_2i64:
62; AVX512DQ:       # %bb.0:
63; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
64; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
65; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
66; AVX512DQ-NEXT:    vzeroupper
67; AVX512DQ-NEXT:    retq
68;
69; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64:
70; AVX512VLDQ:       # %bb.0:
71; AVX512VLDQ-NEXT:    vcvttpd2qq %xmm0, %xmm0
72; AVX512VLDQ-NEXT:    retq
73  %cvt = fptosi <2 x double> %a to <2 x i64>
74  ret <2 x i64> %cvt
75}
76
77define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {
78; SSE-LABEL: fptosi_2f64_to_4i32:
79; SSE:       # %bb.0:
80; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
81; SSE-NEXT:    retq
82;
83; AVX-LABEL: fptosi_2f64_to_4i32:
84; AVX:       # %bb.0:
85; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
86; AVX-NEXT:    retq
87  %cvt = fptosi <2 x double> %a to <2 x i32>
88  %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
89  ret <4 x i32> %ext
90}
91
92define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
93; SSE-LABEL: fptosi_2f64_to_2i32:
94; SSE:       # %bb.0:
95; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
96; SSE-NEXT:    retq
97;
98; AVX-LABEL: fptosi_2f64_to_2i32:
99; AVX:       # %bb.0:
100; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
101; AVX-NEXT:    retq
102  %cvt = fptosi <2 x double> %a to <2 x i32>
103  ret <2 x i32> %cvt
104}
105
106define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
107; SSE-LABEL: fptosi_4f64_to_2i32:
108; SSE:       # %bb.0:
109; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
110; SSE-NEXT:    retq
111;
112; AVX-LABEL: fptosi_4f64_to_2i32:
113; AVX:       # %bb.0:
114; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
115; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0
116; AVX-NEXT:    vzeroupper
117; AVX-NEXT:    retq
118  %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
119  %cvt = fptosi <4 x double> %ext to <4 x i32>
120  ret <4 x i32> %cvt
121}
122
123define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
124; SSE-LABEL: fptosi_4f64_to_4i64:
125; SSE:       # %bb.0:
126; SSE-NEXT:    cvttsd2si %xmm0, %rax
127; SSE-NEXT:    movq %rax, %xmm2
128; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
129; SSE-NEXT:    cvttsd2si %xmm0, %rax
130; SSE-NEXT:    movq %rax, %xmm0
131; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
132; SSE-NEXT:    cvttsd2si %xmm1, %rax
133; SSE-NEXT:    movq %rax, %xmm3
134; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
135; SSE-NEXT:    cvttsd2si %xmm1, %rax
136; SSE-NEXT:    movq %rax, %xmm0
137; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
138; SSE-NEXT:    movdqa %xmm2, %xmm0
139; SSE-NEXT:    movdqa %xmm3, %xmm1
140; SSE-NEXT:    retq
141;
142; AVX1-LABEL: fptosi_4f64_to_4i64:
143; AVX1:       # %bb.0:
144; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
145; AVX1-NEXT:    vcvttsd2si %xmm1, %rax
146; AVX1-NEXT:    vmovq %rax, %xmm2
147; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
148; AVX1-NEXT:    vcvttsd2si %xmm1, %rax
149; AVX1-NEXT:    vmovq %rax, %xmm1
150; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
151; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
152; AVX1-NEXT:    vmovq %rax, %xmm2
153; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
154; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
155; AVX1-NEXT:    vmovq %rax, %xmm0
156; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
157; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
158; AVX1-NEXT:    retq
159;
160; AVX2-LABEL: fptosi_4f64_to_4i64:
161; AVX2:       # %bb.0:
162; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
163; AVX2-NEXT:    vcvttsd2si %xmm1, %rax
164; AVX2-NEXT:    vmovq %rax, %xmm2
165; AVX2-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
166; AVX2-NEXT:    vcvttsd2si %xmm1, %rax
167; AVX2-NEXT:    vmovq %rax, %xmm1
168; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
169; AVX2-NEXT:    vcvttsd2si %xmm0, %rax
170; AVX2-NEXT:    vmovq %rax, %xmm2
171; AVX2-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
172; AVX2-NEXT:    vcvttsd2si %xmm0, %rax
173; AVX2-NEXT:    vmovq %rax, %xmm0
174; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
175; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
176; AVX2-NEXT:    retq
177;
178; AVX512F-LABEL: fptosi_4f64_to_4i64:
179; AVX512F:       # %bb.0:
180; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm1
181; AVX512F-NEXT:    vcvttsd2si %xmm1, %rax
182; AVX512F-NEXT:    vmovq %rax, %xmm2
183; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
184; AVX512F-NEXT:    vcvttsd2si %xmm1, %rax
185; AVX512F-NEXT:    vmovq %rax, %xmm1
186; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
187; AVX512F-NEXT:    vcvttsd2si %xmm0, %rax
188; AVX512F-NEXT:    vmovq %rax, %xmm2
189; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
190; AVX512F-NEXT:    vcvttsd2si %xmm0, %rax
191; AVX512F-NEXT:    vmovq %rax, %xmm0
192; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
193; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
194; AVX512F-NEXT:    retq
195;
196; AVX512VL-LABEL: fptosi_4f64_to_4i64:
197; AVX512VL:       # %bb.0:
198; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm1
199; AVX512VL-NEXT:    vcvttsd2si %xmm1, %rax
200; AVX512VL-NEXT:    vmovq %rax, %xmm2
201; AVX512VL-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
202; AVX512VL-NEXT:    vcvttsd2si %xmm1, %rax
203; AVX512VL-NEXT:    vmovq %rax, %xmm1
204; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
205; AVX512VL-NEXT:    vcvttsd2si %xmm0, %rax
206; AVX512VL-NEXT:    vmovq %rax, %xmm2
207; AVX512VL-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
208; AVX512VL-NEXT:    vcvttsd2si %xmm0, %rax
209; AVX512VL-NEXT:    vmovq %rax, %xmm0
210; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
211; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
212; AVX512VL-NEXT:    retq
213;
214; AVX512DQ-LABEL: fptosi_4f64_to_4i64:
215; AVX512DQ:       # %bb.0:
216; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
217; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
218; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
219; AVX512DQ-NEXT:    retq
220;
221; AVX512VLDQ-LABEL: fptosi_4f64_to_4i64:
222; AVX512VLDQ:       # %bb.0:
223; AVX512VLDQ-NEXT:    vcvttpd2qq %ymm0, %ymm0
224; AVX512VLDQ-NEXT:    retq
225  %cvt = fptosi <4 x double> %a to <4 x i64>
226  ret <4 x i64> %cvt
227}
228
229define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
230; SSE-LABEL: fptosi_4f64_to_4i32:
231; SSE:       # %bb.0:
232; SSE-NEXT:    cvttpd2dq %xmm1, %xmm1
233; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
234; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
235; SSE-NEXT:    retq
236;
237; AVX-LABEL: fptosi_4f64_to_4i32:
238; AVX:       # %bb.0:
239; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0
240; AVX-NEXT:    vzeroupper
241; AVX-NEXT:    retq
242  %cvt = fptosi <4 x double> %a to <4 x i32>
243  ret <4 x i32> %cvt
244}
245
246;
247; Double to Unsigned Integer
248;
249
250define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
251; SSE-LABEL: fptoui_2f64_to_2i64:
252; SSE:       # %bb.0:
253; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
254; SSE-NEXT:    movapd %xmm0, %xmm1
255; SSE-NEXT:    subsd %xmm2, %xmm1
256; SSE-NEXT:    cvttsd2si %xmm1, %rax
257; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
258; SSE-NEXT:    xorq %rcx, %rax
259; SSE-NEXT:    cvttsd2si %xmm0, %rdx
260; SSE-NEXT:    ucomisd %xmm2, %xmm0
261; SSE-NEXT:    cmovaeq %rax, %rdx
262; SSE-NEXT:    movq %rdx, %xmm1
263; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
264; SSE-NEXT:    movapd %xmm0, %xmm3
265; SSE-NEXT:    subsd %xmm2, %xmm3
266; SSE-NEXT:    cvttsd2si %xmm3, %rax
267; SSE-NEXT:    xorq %rcx, %rax
268; SSE-NEXT:    cvttsd2si %xmm0, %rcx
269; SSE-NEXT:    ucomisd %xmm2, %xmm0
270; SSE-NEXT:    cmovaeq %rax, %rcx
271; SSE-NEXT:    movq %rcx, %xmm0
272; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
273; SSE-NEXT:    movdqa %xmm1, %xmm0
274; SSE-NEXT:    retq
275;
276; VEX-LABEL: fptoui_2f64_to_2i64:
277; VEX:       # %bb.0:
278; VEX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
279; VEX-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
280; VEX-NEXT:    vcvttsd2si %xmm2, %rax
281; VEX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
282; VEX-NEXT:    xorq %rcx, %rax
283; VEX-NEXT:    vcvttsd2si %xmm0, %rdx
284; VEX-NEXT:    vucomisd %xmm1, %xmm0
285; VEX-NEXT:    cmovaeq %rax, %rdx
286; VEX-NEXT:    vmovq %rdx, %xmm2
287; VEX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
288; VEX-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
289; VEX-NEXT:    vcvttsd2si %xmm3, %rax
290; VEX-NEXT:    xorq %rcx, %rax
291; VEX-NEXT:    vcvttsd2si %xmm0, %rcx
292; VEX-NEXT:    vucomisd %xmm1, %xmm0
293; VEX-NEXT:    cmovaeq %rax, %rcx
294; VEX-NEXT:    vmovq %rcx, %xmm0
295; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
296; VEX-NEXT:    retq
297;
298; AVX512F-LABEL: fptoui_2f64_to_2i64:
299; AVX512F:       # %bb.0:
300; AVX512F-NEXT:    vcvttsd2usi %xmm0, %rax
301; AVX512F-NEXT:    vmovq %rax, %xmm1
302; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
303; AVX512F-NEXT:    vcvttsd2usi %xmm0, %rax
304; AVX512F-NEXT:    vmovq %rax, %xmm0
305; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
306; AVX512F-NEXT:    retq
307;
308; AVX512VL-LABEL: fptoui_2f64_to_2i64:
309; AVX512VL:       # %bb.0:
310; AVX512VL-NEXT:    vcvttsd2usi %xmm0, %rax
311; AVX512VL-NEXT:    vmovq %rax, %xmm1
312; AVX512VL-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
313; AVX512VL-NEXT:    vcvttsd2usi %xmm0, %rax
314; AVX512VL-NEXT:    vmovq %rax, %xmm0
315; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
316; AVX512VL-NEXT:    retq
317;
318; AVX512DQ-LABEL: fptoui_2f64_to_2i64:
319; AVX512DQ:       # %bb.0:
320; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
321; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
322; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
323; AVX512DQ-NEXT:    vzeroupper
324; AVX512DQ-NEXT:    retq
325;
326; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64:
327; AVX512VLDQ:       # %bb.0:
328; AVX512VLDQ-NEXT:    vcvttpd2uqq %xmm0, %xmm0
329; AVX512VLDQ-NEXT:    retq
330  %cvt = fptoui <2 x double> %a to <2 x i64>
331  ret <2 x i64> %cvt
332}
333
334define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
335; SSE-LABEL: fptoui_2f64_to_4i32:
336; SSE:       # %bb.0:
337; SSE-NEXT:    cvttsd2si %xmm0, %rax
338; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
339; SSE-NEXT:    cvttsd2si %xmm0, %rcx
340; SSE-NEXT:    movd %eax, %xmm0
341; SSE-NEXT:    movd %ecx, %xmm1
342; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
343; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
344; SSE-NEXT:    retq
345;
346; AVX1-LABEL: fptoui_2f64_to_4i32:
347; AVX1:       # %bb.0:
348; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
349; AVX1-NEXT:    vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
350; AVX1-NEXT:    vcmpltpd %ymm1, %ymm0, %ymm2
351; AVX1-NEXT:    vpackssdw %xmm2, %xmm2, %xmm2
352; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm3
353; AVX1-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
354; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm0
355; AVX1-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm0
356; AVX1-NEXT:    vblendvps %xmm2, %xmm3, %xmm0, %xmm0
357; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
358; AVX1-NEXT:    vzeroupper
359; AVX1-NEXT:    retq
360;
361; AVX2-LABEL: fptoui_2f64_to_4i32:
362; AVX2:       # %bb.0:
363; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
364; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
365; AVX2-NEXT:    vcmpltpd %ymm1, %ymm0, %ymm2
366; AVX2-NEXT:    vpackssdw %xmm2, %xmm2, %xmm2
367; AVX2-NEXT:    vsubpd %ymm1, %ymm0, %ymm1
368; AVX2-NEXT:    vcvttpd2dq %ymm1, %xmm1
369; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
370; AVX2-NEXT:    vxorpd %xmm3, %xmm1, %xmm1
371; AVX2-NEXT:    vcvttpd2dq %ymm0, %xmm0
372; AVX2-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
373; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
374; AVX2-NEXT:    vzeroupper
375; AVX2-NEXT:    retq
376;
377; AVX512F-LABEL: fptoui_2f64_to_4i32:
378; AVX512F:       # %bb.0:
379; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
380; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
381; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
382; AVX512F-NEXT:    vzeroupper
383; AVX512F-NEXT:    retq
384;
385; AVX512VL-LABEL: fptoui_2f64_to_4i32:
386; AVX512VL:       # %bb.0:
387; AVX512VL-NEXT:    vcvttpd2udq %xmm0, %xmm0
388; AVX512VL-NEXT:    retq
389;
390; AVX512DQ-LABEL: fptoui_2f64_to_4i32:
391; AVX512DQ:       # %bb.0:
392; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
393; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
394; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
395; AVX512DQ-NEXT:    vzeroupper
396; AVX512DQ-NEXT:    retq
397;
398; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32:
399; AVX512VLDQ:       # %bb.0:
400; AVX512VLDQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
401; AVX512VLDQ-NEXT:    retq
402  %cvt = fptoui <2 x double> %a to <2 x i32>
403  %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
404  ret <4 x i32> %ext
405}
406
407define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
408; SSE-LABEL: fptoui_2f64_to_2i32:
409; SSE:       # %bb.0:
410; SSE-NEXT:    cvttsd2si %xmm0, %rax
411; SSE-NEXT:    movd %eax, %xmm1
412; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
413; SSE-NEXT:    cvttsd2si %xmm0, %rax
414; SSE-NEXT:    movd %eax, %xmm0
415; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
416; SSE-NEXT:    movdqa %xmm1, %xmm0
417; SSE-NEXT:    retq
418;
419; AVX1-LABEL: fptoui_2f64_to_2i32:
420; AVX1:       # %bb.0:
421; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
422; AVX1-NEXT:    vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
423; AVX1-NEXT:    vcmpltpd %ymm1, %ymm0, %ymm2
424; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
425; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
426; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm3
427; AVX1-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
428; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm0
429; AVX1-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm0
430; AVX1-NEXT:    vblendvps %xmm2, %xmm3, %xmm0, %xmm0
431; AVX1-NEXT:    vzeroupper
432; AVX1-NEXT:    retq
433;
434; AVX2-LABEL: fptoui_2f64_to_2i32:
435; AVX2:       # %bb.0:
436; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
437; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
438; AVX2-NEXT:    vcmpltpd %ymm1, %ymm0, %ymm2
439; AVX2-NEXT:    vextractf128 $1, %ymm2, %xmm3
440; AVX2-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
441; AVX2-NEXT:    vsubpd %ymm1, %ymm0, %ymm1
442; AVX2-NEXT:    vcvttpd2dq %ymm1, %xmm1
443; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
444; AVX2-NEXT:    vxorpd %xmm3, %xmm1, %xmm1
445; AVX2-NEXT:    vcvttpd2dq %ymm0, %xmm0
446; AVX2-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
447; AVX2-NEXT:    vzeroupper
448; AVX2-NEXT:    retq
449;
450; AVX512F-LABEL: fptoui_2f64_to_2i32:
451; AVX512F:       # %bb.0:
452; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
453; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
454; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
455; AVX512F-NEXT:    vzeroupper
456; AVX512F-NEXT:    retq
457;
458; AVX512VL-LABEL: fptoui_2f64_to_2i32:
459; AVX512VL:       # %bb.0:
460; AVX512VL-NEXT:    vcvttpd2udq %xmm0, %xmm0
461; AVX512VL-NEXT:    retq
462;
463; AVX512DQ-LABEL: fptoui_2f64_to_2i32:
464; AVX512DQ:       # %bb.0:
465; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
466; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
467; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
468; AVX512DQ-NEXT:    vzeroupper
469; AVX512DQ-NEXT:    retq
470;
471; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32:
472; AVX512VLDQ:       # %bb.0:
473; AVX512VLDQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
474; AVX512VLDQ-NEXT:    retq
475  %cvt = fptoui <2 x double> %a to <2 x i32>
476  %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
477  ret <4 x i32> %ext
478}
479
480define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
481; SSE-LABEL: fptoui_4f64_to_2i32:
482; SSE:       # %bb.0:
483; SSE-NEXT:    cvttsd2si %xmm0, %rax
484; SSE-NEXT:    movd %eax, %xmm1
485; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
486; SSE-NEXT:    cvttsd2si %xmm0, %rax
487; SSE-NEXT:    movd %eax, %xmm0
488; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
489; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
490; SSE-NEXT:    retq
491;
492; AVX1-LABEL: fptoui_4f64_to_2i32:
493; AVX1:       # %bb.0:
494; AVX1-NEXT:    vmovapd %xmm0, %xmm0
495; AVX1-NEXT:    vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
496; AVX1-NEXT:    vcmpltpd %ymm1, %ymm0, %ymm2
497; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
498; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
499; AVX1-NEXT:    vsubpd %ymm1, %ymm0, %ymm1
500; AVX1-NEXT:    vcvttpd2dq %ymm1, %xmm1
501; AVX1-NEXT:    vxorpd {{.*}}(%rip), %xmm1, %xmm1
502; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm0
503; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
504; AVX1-NEXT:    vzeroupper
505; AVX1-NEXT:    retq
506;
507; AVX2-LABEL: fptoui_4f64_to_2i32:
508; AVX2:       # %bb.0:
509; AVX2-NEXT:    vmovapd %xmm0, %xmm0
510; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
511; AVX2-NEXT:    vcmpltpd %ymm1, %ymm0, %ymm2
512; AVX2-NEXT:    vextractf128 $1, %ymm2, %xmm3
513; AVX2-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
514; AVX2-NEXT:    vsubpd %ymm1, %ymm0, %ymm1
515; AVX2-NEXT:    vcvttpd2dq %ymm1, %xmm1
516; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
517; AVX2-NEXT:    vxorpd %xmm3, %xmm1, %xmm1
518; AVX2-NEXT:    vcvttpd2dq %ymm0, %xmm0
519; AVX2-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
520; AVX2-NEXT:    vzeroupper
521; AVX2-NEXT:    retq
522;
523; AVX512F-LABEL: fptoui_4f64_to_2i32:
524; AVX512F:       # %bb.0:
525; AVX512F-NEXT:    vmovaps %xmm0, %xmm0
526; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
527; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
528; AVX512F-NEXT:    vzeroupper
529; AVX512F-NEXT:    retq
530;
531; AVX512VL-LABEL: fptoui_4f64_to_2i32:
532; AVX512VL:       # %bb.0:
533; AVX512VL-NEXT:    vmovaps %xmm0, %xmm0
534; AVX512VL-NEXT:    vcvttpd2udq %ymm0, %xmm0
535; AVX512VL-NEXT:    vzeroupper
536; AVX512VL-NEXT:    retq
537;
538; AVX512DQ-LABEL: fptoui_4f64_to_2i32:
539; AVX512DQ:       # %bb.0:
540; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
541; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
542; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
543; AVX512DQ-NEXT:    vzeroupper
544; AVX512DQ-NEXT:    retq
545;
546; AVX512VLDQ-LABEL: fptoui_4f64_to_2i32:
547; AVX512VLDQ:       # %bb.0:
548; AVX512VLDQ-NEXT:    vmovaps %xmm0, %xmm0
549; AVX512VLDQ-NEXT:    vcvttpd2udq %ymm0, %xmm0
550; AVX512VLDQ-NEXT:    vzeroupper
551; AVX512VLDQ-NEXT:    retq
552  %ext = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
553  %cvt = fptoui <4 x double> %ext to <4 x i32>
554  ret <4 x i32> %cvt
555}
556
557define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
558; SSE-LABEL: fptoui_4f64_to_4i64:
559; SSE:       # %bb.0:
560; SSE-NEXT:    movapd %xmm0, %xmm2
561; SSE-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
562; SSE-NEXT:    subsd %xmm3, %xmm0
563; SSE-NEXT:    cvttsd2si %xmm0, %rcx
564; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
565; SSE-NEXT:    xorq %rax, %rcx
566; SSE-NEXT:    cvttsd2si %xmm2, %rdx
567; SSE-NEXT:    ucomisd %xmm3, %xmm2
568; SSE-NEXT:    cmovaeq %rcx, %rdx
569; SSE-NEXT:    movq %rdx, %xmm0
570; SSE-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
571; SSE-NEXT:    movapd %xmm2, %xmm4
572; SSE-NEXT:    subsd %xmm3, %xmm4
573; SSE-NEXT:    cvttsd2si %xmm4, %rcx
574; SSE-NEXT:    xorq %rax, %rcx
575; SSE-NEXT:    cvttsd2si %xmm2, %rdx
576; SSE-NEXT:    ucomisd %xmm3, %xmm2
577; SSE-NEXT:    cmovaeq %rcx, %rdx
578; SSE-NEXT:    movq %rdx, %xmm2
579; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
580; SSE-NEXT:    movapd %xmm1, %xmm2
581; SSE-NEXT:    subsd %xmm3, %xmm2
582; SSE-NEXT:    cvttsd2si %xmm2, %rcx
583; SSE-NEXT:    xorq %rax, %rcx
584; SSE-NEXT:    cvttsd2si %xmm1, %rdx
585; SSE-NEXT:    ucomisd %xmm3, %xmm1
586; SSE-NEXT:    cmovaeq %rcx, %rdx
587; SSE-NEXT:    movq %rdx, %xmm2
588; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
589; SSE-NEXT:    movapd %xmm1, %xmm4
590; SSE-NEXT:    subsd %xmm3, %xmm4
591; SSE-NEXT:    cvttsd2si %xmm4, %rcx
592; SSE-NEXT:    xorq %rax, %rcx
593; SSE-NEXT:    cvttsd2si %xmm1, %rax
594; SSE-NEXT:    ucomisd %xmm3, %xmm1
595; SSE-NEXT:    cmovaeq %rcx, %rax
596; SSE-NEXT:    movq %rax, %xmm1
597; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
598; SSE-NEXT:    movdqa %xmm2, %xmm1
599; SSE-NEXT:    retq
600;
601; AVX1-LABEL: fptoui_4f64_to_4i64:
602; AVX1:       # %bb.0:
603; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
604; AVX1-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
605; AVX1-NEXT:    vsubsd %xmm1, %xmm2, %xmm3
606; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
607; AVX1-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
608; AVX1-NEXT:    xorq %rcx, %rax
609; AVX1-NEXT:    vcvttsd2si %xmm2, %rdx
610; AVX1-NEXT:    vucomisd %xmm1, %xmm2
611; AVX1-NEXT:    cmovaeq %rax, %rdx
612; AVX1-NEXT:    vmovq %rdx, %xmm3
613; AVX1-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
614; AVX1-NEXT:    vsubsd %xmm1, %xmm2, %xmm4
615; AVX1-NEXT:    vcvttsd2si %xmm4, %rax
616; AVX1-NEXT:    xorq %rcx, %rax
617; AVX1-NEXT:    vcvttsd2si %xmm2, %rdx
618; AVX1-NEXT:    vucomisd %xmm1, %xmm2
619; AVX1-NEXT:    cmovaeq %rax, %rdx
620; AVX1-NEXT:    vmovq %rdx, %xmm2
621; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
622; AVX1-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
623; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
624; AVX1-NEXT:    xorq %rcx, %rax
625; AVX1-NEXT:    vcvttsd2si %xmm0, %rdx
626; AVX1-NEXT:    vucomisd %xmm1, %xmm0
627; AVX1-NEXT:    cmovaeq %rax, %rdx
628; AVX1-NEXT:    vmovq %rdx, %xmm3
629; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
630; AVX1-NEXT:    vsubsd %xmm1, %xmm0, %xmm4
631; AVX1-NEXT:    vcvttsd2si %xmm4, %rax
632; AVX1-NEXT:    xorq %rcx, %rax
633; AVX1-NEXT:    vcvttsd2si %xmm0, %rcx
634; AVX1-NEXT:    vucomisd %xmm1, %xmm0
635; AVX1-NEXT:    cmovaeq %rax, %rcx
636; AVX1-NEXT:    vmovq %rcx, %xmm0
637; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
638; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
639; AVX1-NEXT:    retq
640;
641; AVX2-LABEL: fptoui_4f64_to_4i64:
642; AVX2:       # %bb.0:
643; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm2
644; AVX2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
645; AVX2-NEXT:    vsubsd %xmm1, %xmm2, %xmm3
646; AVX2-NEXT:    vcvttsd2si %xmm3, %rax
647; AVX2-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
648; AVX2-NEXT:    xorq %rcx, %rax
649; AVX2-NEXT:    vcvttsd2si %xmm2, %rdx
650; AVX2-NEXT:    vucomisd %xmm1, %xmm2
651; AVX2-NEXT:    cmovaeq %rax, %rdx
652; AVX2-NEXT:    vmovq %rdx, %xmm3
653; AVX2-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
654; AVX2-NEXT:    vsubsd %xmm1, %xmm2, %xmm4
655; AVX2-NEXT:    vcvttsd2si %xmm4, %rax
656; AVX2-NEXT:    xorq %rcx, %rax
657; AVX2-NEXT:    vcvttsd2si %xmm2, %rdx
658; AVX2-NEXT:    vucomisd %xmm1, %xmm2
659; AVX2-NEXT:    cmovaeq %rax, %rdx
660; AVX2-NEXT:    vmovq %rdx, %xmm2
661; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
662; AVX2-NEXT:    vsubsd %xmm1, %xmm0, %xmm3
663; AVX2-NEXT:    vcvttsd2si %xmm3, %rax
664; AVX2-NEXT:    xorq %rcx, %rax
665; AVX2-NEXT:    vcvttsd2si %xmm0, %rdx
666; AVX2-NEXT:    vucomisd %xmm1, %xmm0
667; AVX2-NEXT:    cmovaeq %rax, %rdx
668; AVX2-NEXT:    vmovq %rdx, %xmm3
669; AVX2-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
670; AVX2-NEXT:    vsubsd %xmm1, %xmm0, %xmm4
671; AVX2-NEXT:    vcvttsd2si %xmm4, %rax
672; AVX2-NEXT:    xorq %rcx, %rax
673; AVX2-NEXT:    vcvttsd2si %xmm0, %rcx
674; AVX2-NEXT:    vucomisd %xmm1, %xmm0
675; AVX2-NEXT:    cmovaeq %rax, %rcx
676; AVX2-NEXT:    vmovq %rcx, %xmm0
677; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
678; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
679; AVX2-NEXT:    retq
680;
681; AVX512F-LABEL: fptoui_4f64_to_4i64:
682; AVX512F:       # %bb.0:
683; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm1
684; AVX512F-NEXT:    vcvttsd2usi %xmm1, %rax
685; AVX512F-NEXT:    vmovq %rax, %xmm2
686; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
687; AVX512F-NEXT:    vcvttsd2usi %xmm1, %rax
688; AVX512F-NEXT:    vmovq %rax, %xmm1
689; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
690; AVX512F-NEXT:    vcvttsd2usi %xmm0, %rax
691; AVX512F-NEXT:    vmovq %rax, %xmm2
692; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
693; AVX512F-NEXT:    vcvttsd2usi %xmm0, %rax
694; AVX512F-NEXT:    vmovq %rax, %xmm0
695; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
696; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
697; AVX512F-NEXT:    retq
698;
699; AVX512VL-LABEL: fptoui_4f64_to_4i64:
700; AVX512VL:       # %bb.0:
701; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm1
702; AVX512VL-NEXT:    vcvttsd2usi %xmm1, %rax
703; AVX512VL-NEXT:    vmovq %rax, %xmm2
704; AVX512VL-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
705; AVX512VL-NEXT:    vcvttsd2usi %xmm1, %rax
706; AVX512VL-NEXT:    vmovq %rax, %xmm1
707; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
708; AVX512VL-NEXT:    vcvttsd2usi %xmm0, %rax
709; AVX512VL-NEXT:    vmovq %rax, %xmm2
710; AVX512VL-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
711; AVX512VL-NEXT:    vcvttsd2usi %xmm0, %rax
712; AVX512VL-NEXT:    vmovq %rax, %xmm0
713; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
714; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
715; AVX512VL-NEXT:    retq
716;
717; AVX512DQ-LABEL: fptoui_4f64_to_4i64:
718; AVX512DQ:       # %bb.0:
719; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
720; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
721; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
722; AVX512DQ-NEXT:    retq
723;
724; AVX512VLDQ-LABEL: fptoui_4f64_to_4i64:
725; AVX512VLDQ:       # %bb.0:
726; AVX512VLDQ-NEXT:    vcvttpd2uqq %ymm0, %ymm0
727; AVX512VLDQ-NEXT:    retq
728  %cvt = fptoui <4 x double> %a to <4 x i64>
729  ret <4 x i64> %cvt
730}
731
732define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
733; SSE-LABEL: fptoui_4f64_to_4i32:
734; SSE:       # %bb.0:
735; SSE-NEXT:    cvttsd2si %xmm1, %rax
736; SSE-NEXT:    movd %eax, %xmm2
737; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
738; SSE-NEXT:    cvttsd2si %xmm1, %rax
739; SSE-NEXT:    movd %eax, %xmm1
740; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
741; SSE-NEXT:    cvttsd2si %xmm0, %rax
742; SSE-NEXT:    movd %eax, %xmm1
743; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
744; SSE-NEXT:    cvttsd2si %xmm0, %rax
745; SSE-NEXT:    movd %eax, %xmm0
746; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
747; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
748; SSE-NEXT:    movdqa %xmm1, %xmm0
749; SSE-NEXT:    retq
750;
751; AVX1-LABEL: fptoui_4f64_to_4i32:
752; AVX1:       # %bb.0:
753; AVX1-NEXT:    vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
754; AVX1-NEXT:    vcmpltpd %ymm1, %ymm0, %ymm2
755; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
756; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
757; AVX1-NEXT:    vsubpd %ymm1, %ymm0, %ymm1
758; AVX1-NEXT:    vcvttpd2dq %ymm1, %xmm1
759; AVX1-NEXT:    vxorpd {{.*}}(%rip), %xmm1, %xmm1
760; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm0
761; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
762; AVX1-NEXT:    vzeroupper
763; AVX1-NEXT:    retq
764;
765; AVX2-LABEL: fptoui_4f64_to_4i32:
766; AVX2:       # %bb.0:
767; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
768; AVX2-NEXT:    vcmpltpd %ymm1, %ymm0, %ymm2
769; AVX2-NEXT:    vextractf128 $1, %ymm2, %xmm3
770; AVX2-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
771; AVX2-NEXT:    vsubpd %ymm1, %ymm0, %ymm1
772; AVX2-NEXT:    vcvttpd2dq %ymm1, %xmm1
773; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
774; AVX2-NEXT:    vxorpd %xmm3, %xmm1, %xmm1
775; AVX2-NEXT:    vcvttpd2dq %ymm0, %xmm0
776; AVX2-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
777; AVX2-NEXT:    vzeroupper
778; AVX2-NEXT:    retq
779;
780; AVX512F-LABEL: fptoui_4f64_to_4i32:
781; AVX512F:       # %bb.0:
782; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
783; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
784; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
785; AVX512F-NEXT:    vzeroupper
786; AVX512F-NEXT:    retq
787;
788; AVX512VL-LABEL: fptoui_4f64_to_4i32:
789; AVX512VL:       # %bb.0:
790; AVX512VL-NEXT:    vcvttpd2udq %ymm0, %xmm0
791; AVX512VL-NEXT:    vzeroupper
792; AVX512VL-NEXT:    retq
793;
794; AVX512DQ-LABEL: fptoui_4f64_to_4i32:
795; AVX512DQ:       # %bb.0:
796; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
797; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
798; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
799; AVX512DQ-NEXT:    vzeroupper
800; AVX512DQ-NEXT:    retq
801;
802; AVX512VLDQ-LABEL: fptoui_4f64_to_4i32:
803; AVX512VLDQ:       # %bb.0:
804; AVX512VLDQ-NEXT:    vcvttpd2udq %ymm0, %xmm0
805; AVX512VLDQ-NEXT:    vzeroupper
806; AVX512VLDQ-NEXT:    retq
807  %cvt = fptoui <4 x double> %a to <4 x i32>
808  ret <4 x i32> %cvt
809}
810
811;
812; Float to Signed Integer
813;
814
815define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) {
816; SSE-LABEL: fptosi_2f32_to_2i32:
817; SSE:       # %bb.0:
818; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
819; SSE-NEXT:    retq
820;
821; AVX-LABEL: fptosi_2f32_to_2i32:
822; AVX:       # %bb.0:
823; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
824; AVX-NEXT:    retq
825  %cvt = fptosi <2 x float> %a to <2 x i32>
826  ret <2 x i32> %cvt
827}
828
829define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
830; SSE-LABEL: fptosi_4f32_to_4i32:
831; SSE:       # %bb.0:
832; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
833; SSE-NEXT:    retq
834;
835; AVX-LABEL: fptosi_4f32_to_4i32:
836; AVX:       # %bb.0:
837; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
838; AVX-NEXT:    retq
839  %cvt = fptosi <4 x float> %a to <4 x i32>
840  ret <4 x i32> %cvt
841}
842
843define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
844; SSE-LABEL: fptosi_2f32_to_2i64:
845; SSE:       # %bb.0:
846; SSE-NEXT:    cvttss2si %xmm0, %rax
847; SSE-NEXT:    movq %rax, %xmm1
848; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
849; SSE-NEXT:    cvttss2si %xmm0, %rax
850; SSE-NEXT:    movq %rax, %xmm0
851; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
852; SSE-NEXT:    movdqa %xmm1, %xmm0
853; SSE-NEXT:    retq
854;
855; VEX-LABEL: fptosi_2f32_to_2i64:
856; VEX:       # %bb.0:
857; VEX-NEXT:    vcvttss2si %xmm0, %rax
858; VEX-NEXT:    vmovq %rax, %xmm1
859; VEX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
860; VEX-NEXT:    vcvttss2si %xmm0, %rax
861; VEX-NEXT:    vmovq %rax, %xmm0
862; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
863; VEX-NEXT:    retq
864;
865; AVX512F-LABEL: fptosi_2f32_to_2i64:
866; AVX512F:       # %bb.0:
867; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
868; AVX512F-NEXT:    vmovq %rax, %xmm1
869; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
870; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
871; AVX512F-NEXT:    vmovq %rax, %xmm0
872; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
873; AVX512F-NEXT:    retq
874;
875; AVX512VL-LABEL: fptosi_2f32_to_2i64:
876; AVX512VL:       # %bb.0:
877; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
878; AVX512VL-NEXT:    vmovq %rax, %xmm1
879; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
880; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
881; AVX512VL-NEXT:    vmovq %rax, %xmm0
882; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
883; AVX512VL-NEXT:    retq
884;
885; AVX512DQ-LABEL: fptosi_2f32_to_2i64:
886; AVX512DQ:       # %bb.0:
887; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
888; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
889; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
890; AVX512DQ-NEXT:    vzeroupper
891; AVX512DQ-NEXT:    retq
892;
893; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64:
894; AVX512VLDQ:       # %bb.0:
895; AVX512VLDQ-NEXT:    vcvttps2qq %xmm0, %xmm0
896; AVX512VLDQ-NEXT:    retq
897  %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
898  %cvt = fptosi <2 x float> %shuf to <2 x i64>
899  ret <2 x i64> %cvt
900}
901
902define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
903; SSE-LABEL: fptosi_4f32_to_2i64:
904; SSE:       # %bb.0:
905; SSE-NEXT:    cvttss2si %xmm0, %rax
906; SSE-NEXT:    movq %rax, %xmm1
907; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
908; SSE-NEXT:    cvttss2si %xmm0, %rax
909; SSE-NEXT:    movq %rax, %xmm0
910; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
911; SSE-NEXT:    movdqa %xmm1, %xmm0
912; SSE-NEXT:    retq
913;
914; VEX-LABEL: fptosi_4f32_to_2i64:
915; VEX:       # %bb.0:
916; VEX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
917; VEX-NEXT:    vcvttss2si %xmm1, %rax
918; VEX-NEXT:    vcvttss2si %xmm0, %rcx
919; VEX-NEXT:    vmovq %rcx, %xmm0
920; VEX-NEXT:    vmovq %rax, %xmm1
921; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
922; VEX-NEXT:    retq
923;
924; AVX512F-LABEL: fptosi_4f32_to_2i64:
925; AVX512F:       # %bb.0:
926; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
927; AVX512F-NEXT:    vcvttss2si %xmm1, %rax
928; AVX512F-NEXT:    vcvttss2si %xmm0, %rcx
929; AVX512F-NEXT:    vmovq %rcx, %xmm0
930; AVX512F-NEXT:    vmovq %rax, %xmm1
931; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
932; AVX512F-NEXT:    retq
933;
934; AVX512VL-LABEL: fptosi_4f32_to_2i64:
935; AVX512VL:       # %bb.0:
936; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
937; AVX512VL-NEXT:    vcvttss2si %xmm1, %rax
938; AVX512VL-NEXT:    vcvttss2si %xmm0, %rcx
939; AVX512VL-NEXT:    vmovq %rcx, %xmm0
940; AVX512VL-NEXT:    vmovq %rax, %xmm1
941; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
942; AVX512VL-NEXT:    retq
943;
944; AVX512DQ-LABEL: fptosi_4f32_to_2i64:
945; AVX512DQ:       # %bb.0:
946; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
947; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
948; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
949; AVX512DQ-NEXT:    vzeroupper
950; AVX512DQ-NEXT:    retq
951;
952; AVX512VLDQ-LABEL: fptosi_4f32_to_2i64:
953; AVX512VLDQ:       # %bb.0:
954; AVX512VLDQ-NEXT:    vcvttps2qq %xmm0, %ymm0
955; AVX512VLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
956; AVX512VLDQ-NEXT:    vzeroupper
957; AVX512VLDQ-NEXT:    retq
958  %cvt = fptosi <4 x float> %a to <4 x i64>
959  %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
960  ret <2 x i64> %shuf
961}
962
963define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
964; SSE-LABEL: fptosi_8f32_to_8i32:
965; SSE:       # %bb.0:
966; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
967; SSE-NEXT:    cvttps2dq %xmm1, %xmm1
968; SSE-NEXT:    retq
969;
970; AVX-LABEL: fptosi_8f32_to_8i32:
971; AVX:       # %bb.0:
972; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
973; AVX-NEXT:    retq
974  %cvt = fptosi <8 x float> %a to <8 x i32>
975  ret <8 x i32> %cvt
976}
977
978define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
979; SSE-LABEL: fptosi_4f32_to_4i64:
980; SSE:       # %bb.0:
981; SSE-NEXT:    cvttss2si %xmm0, %rax
982; SSE-NEXT:    movq %rax, %xmm2
983; SSE-NEXT:    movaps %xmm0, %xmm1
984; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
985; SSE-NEXT:    cvttss2si %xmm1, %rax
986; SSE-NEXT:    movq %rax, %xmm1
987; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
988; SSE-NEXT:    movaps %xmm0, %xmm1
989; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
990; SSE-NEXT:    cvttss2si %xmm1, %rax
991; SSE-NEXT:    movq %rax, %xmm3
992; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
993; SSE-NEXT:    cvttss2si %xmm0, %rax
994; SSE-NEXT:    movq %rax, %xmm1
995; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
996; SSE-NEXT:    movdqa %xmm2, %xmm0
997; SSE-NEXT:    retq
998;
999; AVX1-LABEL: fptosi_4f32_to_4i64:
1000; AVX1:       # %bb.0:
1001; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1002; AVX1-NEXT:    vcvttss2si %xmm1, %rax
1003; AVX1-NEXT:    vmovq %rax, %xmm1
1004; AVX1-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1005; AVX1-NEXT:    vcvttss2si %xmm2, %rax
1006; AVX1-NEXT:    vmovq %rax, %xmm2
1007; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1008; AVX1-NEXT:    vcvttss2si %xmm0, %rax
1009; AVX1-NEXT:    vmovq %rax, %xmm2
1010; AVX1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1011; AVX1-NEXT:    vcvttss2si %xmm0, %rax
1012; AVX1-NEXT:    vmovq %rax, %xmm0
1013; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1014; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1015; AVX1-NEXT:    retq
1016;
1017; AVX2-LABEL: fptosi_4f32_to_4i64:
1018; AVX2:       # %bb.0:
1019; AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1020; AVX2-NEXT:    vcvttss2si %xmm1, %rax
1021; AVX2-NEXT:    vmovq %rax, %xmm1
1022; AVX2-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1023; AVX2-NEXT:    vcvttss2si %xmm2, %rax
1024; AVX2-NEXT:    vmovq %rax, %xmm2
1025; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1026; AVX2-NEXT:    vcvttss2si %xmm0, %rax
1027; AVX2-NEXT:    vmovq %rax, %xmm2
1028; AVX2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1029; AVX2-NEXT:    vcvttss2si %xmm0, %rax
1030; AVX2-NEXT:    vmovq %rax, %xmm0
1031; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1032; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1033; AVX2-NEXT:    retq
1034;
1035; AVX512F-LABEL: fptosi_4f32_to_4i64:
1036; AVX512F:       # %bb.0:
1037; AVX512F-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1038; AVX512F-NEXT:    vcvttss2si %xmm1, %rax
1039; AVX512F-NEXT:    vmovq %rax, %xmm1
1040; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1041; AVX512F-NEXT:    vcvttss2si %xmm2, %rax
1042; AVX512F-NEXT:    vmovq %rax, %xmm2
1043; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1044; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
1045; AVX512F-NEXT:    vmovq %rax, %xmm2
1046; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1047; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
1048; AVX512F-NEXT:    vmovq %rax, %xmm0
1049; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1050; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1051; AVX512F-NEXT:    retq
1052;
1053; AVX512VL-LABEL: fptosi_4f32_to_4i64:
1054; AVX512VL:       # %bb.0:
1055; AVX512VL-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1056; AVX512VL-NEXT:    vcvttss2si %xmm1, %rax
1057; AVX512VL-NEXT:    vmovq %rax, %xmm1
1058; AVX512VL-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1059; AVX512VL-NEXT:    vcvttss2si %xmm2, %rax
1060; AVX512VL-NEXT:    vmovq %rax, %xmm2
1061; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1062; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
1063; AVX512VL-NEXT:    vmovq %rax, %xmm2
1064; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1065; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
1066; AVX512VL-NEXT:    vmovq %rax, %xmm0
1067; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1068; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1069; AVX512VL-NEXT:    retq
1070;
1071; AVX512DQ-LABEL: fptosi_4f32_to_4i64:
1072; AVX512DQ:       # %bb.0:
1073; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
1074; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1075; AVX512DQ-NEXT:    retq
1076;
1077; AVX512VLDQ-LABEL: fptosi_4f32_to_4i64:
1078; AVX512VLDQ:       # %bb.0:
1079; AVX512VLDQ-NEXT:    vcvttps2qq %xmm0, %ymm0
1080; AVX512VLDQ-NEXT:    retq
1081  %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1082  %cvt = fptosi <4 x float> %shuf to <4 x i64>
1083  ret <4 x i64> %cvt
1084}
1085
1086define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
1087; SSE-LABEL: fptosi_8f32_to_4i64:
1088; SSE:       # %bb.0:
1089; SSE-NEXT:    cvttss2si %xmm0, %rax
1090; SSE-NEXT:    movq %rax, %xmm2
1091; SSE-NEXT:    movaps %xmm0, %xmm1
1092; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
1093; SSE-NEXT:    cvttss2si %xmm1, %rax
1094; SSE-NEXT:    movq %rax, %xmm1
1095; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
1096; SSE-NEXT:    movaps %xmm0, %xmm1
1097; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
1098; SSE-NEXT:    cvttss2si %xmm1, %rax
1099; SSE-NEXT:    movq %rax, %xmm3
1100; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1101; SSE-NEXT:    cvttss2si %xmm0, %rax
1102; SSE-NEXT:    movq %rax, %xmm1
1103; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1104; SSE-NEXT:    movdqa %xmm2, %xmm0
1105; SSE-NEXT:    retq
1106;
1107; AVX1-LABEL: fptosi_8f32_to_4i64:
1108; AVX1:       # %bb.0:
1109; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1110; AVX1-NEXT:    vcvttss2si %xmm1, %rax
1111; AVX1-NEXT:    vmovq %rax, %xmm1
1112; AVX1-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1113; AVX1-NEXT:    vcvttss2si %xmm2, %rax
1114; AVX1-NEXT:    vmovq %rax, %xmm2
1115; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1116; AVX1-NEXT:    vcvttss2si %xmm0, %rax
1117; AVX1-NEXT:    vmovq %rax, %xmm2
1118; AVX1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1119; AVX1-NEXT:    vcvttss2si %xmm0, %rax
1120; AVX1-NEXT:    vmovq %rax, %xmm0
1121; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1122; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1123; AVX1-NEXT:    retq
1124;
1125; AVX2-LABEL: fptosi_8f32_to_4i64:
1126; AVX2:       # %bb.0:
1127; AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1128; AVX2-NEXT:    vcvttss2si %xmm1, %rax
1129; AVX2-NEXT:    vmovq %rax, %xmm1
1130; AVX2-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1131; AVX2-NEXT:    vcvttss2si %xmm2, %rax
1132; AVX2-NEXT:    vmovq %rax, %xmm2
1133; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1134; AVX2-NEXT:    vcvttss2si %xmm0, %rax
1135; AVX2-NEXT:    vmovq %rax, %xmm2
1136; AVX2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1137; AVX2-NEXT:    vcvttss2si %xmm0, %rax
1138; AVX2-NEXT:    vmovq %rax, %xmm0
1139; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1140; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1141; AVX2-NEXT:    retq
1142;
1143; AVX512F-LABEL: fptosi_8f32_to_4i64:
1144; AVX512F:       # %bb.0:
1145; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1146; AVX512F-NEXT:    vcvttss2si %xmm1, %rax
1147; AVX512F-NEXT:    vcvttss2si %xmm0, %rcx
1148; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1149; AVX512F-NEXT:    vcvttss2si %xmm1, %rdx
1150; AVX512F-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1151; AVX512F-NEXT:    vcvttss2si %xmm0, %rsi
1152; AVX512F-NEXT:    vmovq %rsi, %xmm0
1153; AVX512F-NEXT:    vmovq %rdx, %xmm1
1154; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1155; AVX512F-NEXT:    vmovq %rcx, %xmm1
1156; AVX512F-NEXT:    vmovq %rax, %xmm2
1157; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1158; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1159; AVX512F-NEXT:    retq
1160;
1161; AVX512VL-LABEL: fptosi_8f32_to_4i64:
1162; AVX512VL:       # %bb.0:
1163; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1164; AVX512VL-NEXT:    vcvttss2si %xmm1, %rax
1165; AVX512VL-NEXT:    vcvttss2si %xmm0, %rcx
1166; AVX512VL-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1167; AVX512VL-NEXT:    vcvttss2si %xmm1, %rdx
1168; AVX512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1169; AVX512VL-NEXT:    vcvttss2si %xmm0, %rsi
1170; AVX512VL-NEXT:    vmovq %rsi, %xmm0
1171; AVX512VL-NEXT:    vmovq %rdx, %xmm1
1172; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1173; AVX512VL-NEXT:    vmovq %rcx, %xmm1
1174; AVX512VL-NEXT:    vmovq %rax, %xmm2
1175; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1176; AVX512VL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1177; AVX512VL-NEXT:    retq
1178;
1179; AVX512DQ-LABEL: fptosi_8f32_to_4i64:
1180; AVX512DQ:       # %bb.0:
1181; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
1182; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1183; AVX512DQ-NEXT:    retq
1184;
1185; AVX512VLDQ-LABEL: fptosi_8f32_to_4i64:
1186; AVX512VLDQ:       # %bb.0:
1187; AVX512VLDQ-NEXT:    vcvttps2qq %ymm0, %zmm0
1188; AVX512VLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1189; AVX512VLDQ-NEXT:    retq
1190  %cvt = fptosi <8 x float> %a to <8 x i64>
1191  %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1192  ret <4 x i64> %shuf
1193}
1194
1195;
1196; Float to Unsigned Integer
1197;
1198
1199define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) {
1200; SSE-LABEL: fptoui_2f32_to_2i32:
1201; SSE:       # %bb.0:
1202; SSE-NEXT:    movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1203; SSE-NEXT:    movaps %xmm0, %xmm1
1204; SSE-NEXT:    cmpltps %xmm2, %xmm1
1205; SSE-NEXT:    cvttps2dq %xmm0, %xmm3
1206; SSE-NEXT:    subps %xmm2, %xmm0
1207; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
1208; SSE-NEXT:    xorps {{.*}}(%rip), %xmm0
1209; SSE-NEXT:    andps %xmm1, %xmm3
1210; SSE-NEXT:    andnps %xmm0, %xmm1
1211; SSE-NEXT:    orps %xmm3, %xmm1
1212; SSE-NEXT:    movaps %xmm1, %xmm0
1213; SSE-NEXT:    retq
1214;
1215; AVX1-LABEL: fptoui_2f32_to_2i32:
1216; AVX1:       # %bb.0:
1217; AVX1-NEXT:    vmovaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1218; AVX1-NEXT:    vcmpltps %xmm1, %xmm0, %xmm2
1219; AVX1-NEXT:    vsubps %xmm1, %xmm0, %xmm1
1220; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
1221; AVX1-NEXT:    vxorps {{.*}}(%rip), %xmm1, %xmm1
1222; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
1223; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1224; AVX1-NEXT:    retq
1225;
1226; AVX2-LABEL: fptoui_2f32_to_2i32:
1227; AVX2:       # %bb.0:
1228; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1229; AVX2-NEXT:    vcmpltps %xmm1, %xmm0, %xmm2
1230; AVX2-NEXT:    vsubps %xmm1, %xmm0, %xmm1
1231; AVX2-NEXT:    vcvttps2dq %xmm1, %xmm1
1232; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
1233; AVX2-NEXT:    vxorps %xmm3, %xmm1, %xmm1
1234; AVX2-NEXT:    vcvttps2dq %xmm0, %xmm0
1235; AVX2-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1236; AVX2-NEXT:    retq
1237;
1238; AVX512F-LABEL: fptoui_2f32_to_2i32:
1239; AVX512F:       # %bb.0:
1240; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1241; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
1242; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1243; AVX512F-NEXT:    vzeroupper
1244; AVX512F-NEXT:    retq
1245;
1246; AVX512VL-LABEL: fptoui_2f32_to_2i32:
1247; AVX512VL:       # %bb.0:
1248; AVX512VL-NEXT:    vcvttps2udq %xmm0, %xmm0
1249; AVX512VL-NEXT:    retq
1250;
1251; AVX512DQ-LABEL: fptoui_2f32_to_2i32:
1252; AVX512DQ:       # %bb.0:
1253; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1254; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
1255; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1256; AVX512DQ-NEXT:    vzeroupper
1257; AVX512DQ-NEXT:    retq
1258;
1259; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32:
1260; AVX512VLDQ:       # %bb.0:
1261; AVX512VLDQ-NEXT:    vcvttps2udq %xmm0, %xmm0
1262; AVX512VLDQ-NEXT:    retq
1263  %cvt = fptoui <2 x float> %a to <2 x i32>
1264  ret <2 x i32> %cvt
1265}
1266
1267define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
1268; SSE-LABEL: fptoui_4f32_to_4i32:
1269; SSE:       # %bb.0:
1270; SSE-NEXT:    movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1271; SSE-NEXT:    movaps %xmm0, %xmm1
1272; SSE-NEXT:    cmpltps %xmm2, %xmm1
1273; SSE-NEXT:    cvttps2dq %xmm0, %xmm3
1274; SSE-NEXT:    subps %xmm2, %xmm0
1275; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
1276; SSE-NEXT:    xorps {{.*}}(%rip), %xmm0
1277; SSE-NEXT:    andps %xmm1, %xmm3
1278; SSE-NEXT:    andnps %xmm0, %xmm1
1279; SSE-NEXT:    orps %xmm3, %xmm1
1280; SSE-NEXT:    movaps %xmm1, %xmm0
1281; SSE-NEXT:    retq
1282;
1283; AVX1-LABEL: fptoui_4f32_to_4i32:
1284; AVX1:       # %bb.0:
1285; AVX1-NEXT:    vmovaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1286; AVX1-NEXT:    vcmpltps %xmm1, %xmm0, %xmm2
1287; AVX1-NEXT:    vsubps %xmm1, %xmm0, %xmm1
1288; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
1289; AVX1-NEXT:    vxorps {{.*}}(%rip), %xmm1, %xmm1
1290; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
1291; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1292; AVX1-NEXT:    retq
1293;
1294; AVX2-LABEL: fptoui_4f32_to_4i32:
1295; AVX2:       # %bb.0:
1296; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1297; AVX2-NEXT:    vcmpltps %xmm1, %xmm0, %xmm2
1298; AVX2-NEXT:    vsubps %xmm1, %xmm0, %xmm1
1299; AVX2-NEXT:    vcvttps2dq %xmm1, %xmm1
1300; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
1301; AVX2-NEXT:    vxorps %xmm3, %xmm1, %xmm1
1302; AVX2-NEXT:    vcvttps2dq %xmm0, %xmm0
1303; AVX2-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
1304; AVX2-NEXT:    retq
1305;
1306; AVX512F-LABEL: fptoui_4f32_to_4i32:
1307; AVX512F:       # %bb.0:
1308; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1309; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
1310; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1311; AVX512F-NEXT:    vzeroupper
1312; AVX512F-NEXT:    retq
1313;
1314; AVX512VL-LABEL: fptoui_4f32_to_4i32:
1315; AVX512VL:       # %bb.0:
1316; AVX512VL-NEXT:    vcvttps2udq %xmm0, %xmm0
1317; AVX512VL-NEXT:    retq
1318;
1319; AVX512DQ-LABEL: fptoui_4f32_to_4i32:
1320; AVX512DQ:       # %bb.0:
1321; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1322; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
1323; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1324; AVX512DQ-NEXT:    vzeroupper
1325; AVX512DQ-NEXT:    retq
1326;
1327; AVX512VLDQ-LABEL: fptoui_4f32_to_4i32:
1328; AVX512VLDQ:       # %bb.0:
1329; AVX512VLDQ-NEXT:    vcvttps2udq %xmm0, %xmm0
1330; AVX512VLDQ-NEXT:    retq
1331  %cvt = fptoui <4 x float> %a to <4 x i32>
1332  ret <4 x i32> %cvt
1333}
1334
1335define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
1336; SSE-LABEL: fptoui_2f32_to_2i64:
1337; SSE:       # %bb.0:
1338; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1339; SSE-NEXT:    movaps %xmm0, %xmm1
1340; SSE-NEXT:    subss %xmm2, %xmm1
1341; SSE-NEXT:    cvttss2si %xmm1, %rax
1342; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1343; SSE-NEXT:    xorq %rcx, %rax
1344; SSE-NEXT:    cvttss2si %xmm0, %rdx
1345; SSE-NEXT:    ucomiss %xmm2, %xmm0
1346; SSE-NEXT:    cmovaeq %rax, %rdx
1347; SSE-NEXT:    movq %rdx, %xmm1
1348; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1349; SSE-NEXT:    movaps %xmm0, %xmm3
1350; SSE-NEXT:    subss %xmm2, %xmm3
1351; SSE-NEXT:    cvttss2si %xmm3, %rax
1352; SSE-NEXT:    xorq %rcx, %rax
1353; SSE-NEXT:    cvttss2si %xmm0, %rcx
1354; SSE-NEXT:    ucomiss %xmm2, %xmm0
1355; SSE-NEXT:    cmovaeq %rax, %rcx
1356; SSE-NEXT:    movq %rcx, %xmm0
1357; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1358; SSE-NEXT:    movdqa %xmm1, %xmm0
1359; SSE-NEXT:    retq
1360;
1361; VEX-LABEL: fptoui_2f32_to_2i64:
1362; VEX:       # %bb.0:
1363; VEX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1364; VEX-NEXT:    vsubss %xmm1, %xmm0, %xmm2
1365; VEX-NEXT:    vcvttss2si %xmm2, %rax
1366; VEX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1367; VEX-NEXT:    xorq %rcx, %rax
1368; VEX-NEXT:    vcvttss2si %xmm0, %rdx
1369; VEX-NEXT:    vucomiss %xmm1, %xmm0
1370; VEX-NEXT:    cmovaeq %rax, %rdx
1371; VEX-NEXT:    vmovq %rdx, %xmm2
1372; VEX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1373; VEX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
1374; VEX-NEXT:    vcvttss2si %xmm3, %rax
1375; VEX-NEXT:    xorq %rcx, %rax
1376; VEX-NEXT:    vcvttss2si %xmm0, %rcx
1377; VEX-NEXT:    vucomiss %xmm1, %xmm0
1378; VEX-NEXT:    cmovaeq %rax, %rcx
1379; VEX-NEXT:    vmovq %rcx, %xmm0
1380; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1381; VEX-NEXT:    retq
1382;
1383; AVX512F-LABEL: fptoui_2f32_to_2i64:
1384; AVX512F:       # %bb.0:
1385; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
1386; AVX512F-NEXT:    vmovq %rax, %xmm1
1387; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1388; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
1389; AVX512F-NEXT:    vmovq %rax, %xmm0
1390; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1391; AVX512F-NEXT:    retq
1392;
1393; AVX512VL-LABEL: fptoui_2f32_to_2i64:
1394; AVX512VL:       # %bb.0:
1395; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
1396; AVX512VL-NEXT:    vmovq %rax, %xmm1
1397; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1398; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
1399; AVX512VL-NEXT:    vmovq %rax, %xmm0
1400; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1401; AVX512VL-NEXT:    retq
1402;
1403; AVX512DQ-LABEL: fptoui_2f32_to_2i64:
1404; AVX512DQ:       # %bb.0:
1405; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1406; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1407; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1408; AVX512DQ-NEXT:    vzeroupper
1409; AVX512DQ-NEXT:    retq
1410;
1411; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64:
1412; AVX512VLDQ:       # %bb.0:
1413; AVX512VLDQ-NEXT:    vcvttps2uqq %xmm0, %xmm0
1414; AVX512VLDQ-NEXT:    retq
1415  %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1416  %cvt = fptoui <2 x float> %shuf to <2 x i64>
1417  ret <2 x i64> %cvt
1418}
1419
1420define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
1421; SSE-LABEL: fptoui_4f32_to_2i64:
1422; SSE:       # %bb.0:
1423; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1424; SSE-NEXT:    movaps %xmm0, %xmm1
1425; SSE-NEXT:    subss %xmm2, %xmm1
1426; SSE-NEXT:    cvttss2si %xmm1, %rax
1427; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1428; SSE-NEXT:    xorq %rcx, %rax
1429; SSE-NEXT:    cvttss2si %xmm0, %rdx
1430; SSE-NEXT:    ucomiss %xmm2, %xmm0
1431; SSE-NEXT:    cmovaeq %rax, %rdx
1432; SSE-NEXT:    movq %rdx, %xmm1
1433; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1434; SSE-NEXT:    movaps %xmm0, %xmm3
1435; SSE-NEXT:    subss %xmm2, %xmm3
1436; SSE-NEXT:    cvttss2si %xmm3, %rax
1437; SSE-NEXT:    xorq %rcx, %rax
1438; SSE-NEXT:    cvttss2si %xmm0, %rcx
1439; SSE-NEXT:    ucomiss %xmm2, %xmm0
1440; SSE-NEXT:    cmovaeq %rax, %rcx
1441; SSE-NEXT:    movq %rcx, %xmm0
1442; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1443; SSE-NEXT:    movdqa %xmm1, %xmm0
1444; SSE-NEXT:    retq
1445;
1446; VEX-LABEL: fptoui_4f32_to_2i64:
1447; VEX:       # %bb.0:
1448; VEX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1449; VEX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1450; VEX-NEXT:    vsubss %xmm2, %xmm1, %xmm3
1451; VEX-NEXT:    vcvttss2si %xmm3, %rax
1452; VEX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1453; VEX-NEXT:    xorq %rcx, %rax
1454; VEX-NEXT:    vcvttss2si %xmm1, %rdx
1455; VEX-NEXT:    vucomiss %xmm2, %xmm1
1456; VEX-NEXT:    cmovaeq %rax, %rdx
1457; VEX-NEXT:    vsubss %xmm2, %xmm0, %xmm1
1458; VEX-NEXT:    vcvttss2si %xmm1, %rax
1459; VEX-NEXT:    xorq %rcx, %rax
1460; VEX-NEXT:    vcvttss2si %xmm0, %rcx
1461; VEX-NEXT:    vucomiss %xmm2, %xmm0
1462; VEX-NEXT:    cmovaeq %rax, %rcx
1463; VEX-NEXT:    vmovq %rcx, %xmm0
1464; VEX-NEXT:    vmovq %rdx, %xmm1
1465; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1466; VEX-NEXT:    retq
1467;
1468; AVX512F-LABEL: fptoui_4f32_to_2i64:
1469; AVX512F:       # %bb.0:
1470; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1471; AVX512F-NEXT:    vcvttss2usi %xmm1, %rax
1472; AVX512F-NEXT:    vcvttss2usi %xmm0, %rcx
1473; AVX512F-NEXT:    vmovq %rcx, %xmm0
1474; AVX512F-NEXT:    vmovq %rax, %xmm1
1475; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1476; AVX512F-NEXT:    retq
1477;
1478; AVX512VL-LABEL: fptoui_4f32_to_2i64:
1479; AVX512VL:       # %bb.0:
1480; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1481; AVX512VL-NEXT:    vcvttss2usi %xmm1, %rax
1482; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rcx
1483; AVX512VL-NEXT:    vmovq %rcx, %xmm0
1484; AVX512VL-NEXT:    vmovq %rax, %xmm1
1485; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1486; AVX512VL-NEXT:    retq
1487;
1488; AVX512DQ-LABEL: fptoui_4f32_to_2i64:
1489; AVX512DQ:       # %bb.0:
1490; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
1491; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1492; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1493; AVX512DQ-NEXT:    vzeroupper
1494; AVX512DQ-NEXT:    retq
1495;
1496; AVX512VLDQ-LABEL: fptoui_4f32_to_2i64:
1497; AVX512VLDQ:       # %bb.0:
1498; AVX512VLDQ-NEXT:    vcvttps2uqq %xmm0, %ymm0
1499; AVX512VLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1500; AVX512VLDQ-NEXT:    vzeroupper
1501; AVX512VLDQ-NEXT:    retq
1502  %cvt = fptoui <4 x float> %a to <4 x i64>
1503  %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
1504  ret <2 x i64> %shuf
1505}
1506
1507define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
1508; SSE-LABEL: fptoui_8f32_to_8i32:
1509; SSE:       # %bb.0:
1510; SSE-NEXT:    movaps {{.*#+}} xmm4 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1511; SSE-NEXT:    movaps %xmm0, %xmm2
1512; SSE-NEXT:    cmpltps %xmm4, %xmm2
1513; SSE-NEXT:    cvttps2dq %xmm0, %xmm3
1514; SSE-NEXT:    subps %xmm4, %xmm0
1515; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
1516; SSE-NEXT:    movaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
1517; SSE-NEXT:    xorps %xmm5, %xmm0
1518; SSE-NEXT:    andps %xmm2, %xmm3
1519; SSE-NEXT:    andnps %xmm0, %xmm2
1520; SSE-NEXT:    orps %xmm3, %xmm2
1521; SSE-NEXT:    movaps %xmm1, %xmm3
1522; SSE-NEXT:    cmpltps %xmm4, %xmm3
1523; SSE-NEXT:    cvttps2dq %xmm1, %xmm0
1524; SSE-NEXT:    subps %xmm4, %xmm1
1525; SSE-NEXT:    cvttps2dq %xmm1, %xmm1
1526; SSE-NEXT:    xorps %xmm5, %xmm1
1527; SSE-NEXT:    andps %xmm3, %xmm0
1528; SSE-NEXT:    andnps %xmm1, %xmm3
1529; SSE-NEXT:    orps %xmm0, %xmm3
1530; SSE-NEXT:    movaps %xmm2, %xmm0
1531; SSE-NEXT:    movaps %xmm3, %xmm1
1532; SSE-NEXT:    retq
1533;
1534; AVX1-LABEL: fptoui_8f32_to_8i32:
1535; AVX1:       # %bb.0:
1536; AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1537; AVX1-NEXT:    vcmpltps %ymm1, %ymm0, %ymm2
1538; AVX1-NEXT:    vsubps %ymm1, %ymm0, %ymm1
1539; AVX1-NEXT:    vcvttps2dq %ymm1, %ymm1
1540; AVX1-NEXT:    vxorps {{.*}}(%rip), %ymm1, %ymm1
1541; AVX1-NEXT:    vcvttps2dq %ymm0, %ymm0
1542; AVX1-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
1543; AVX1-NEXT:    retq
1544;
1545; AVX2-LABEL: fptoui_8f32_to_8i32:
1546; AVX2:       # %bb.0:
1547; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1548; AVX2-NEXT:    vcmpltps %ymm1, %ymm0, %ymm2
1549; AVX2-NEXT:    vsubps %ymm1, %ymm0, %ymm1
1550; AVX2-NEXT:    vcvttps2dq %ymm1, %ymm1
1551; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
1552; AVX2-NEXT:    vxorps %ymm3, %ymm1, %ymm1
1553; AVX2-NEXT:    vcvttps2dq %ymm0, %ymm0
1554; AVX2-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
1555; AVX2-NEXT:    retq
1556;
1557; AVX512F-LABEL: fptoui_8f32_to_8i32:
1558; AVX512F:       # %bb.0:
1559; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1560; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
1561; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1562; AVX512F-NEXT:    retq
1563;
1564; AVX512VL-LABEL: fptoui_8f32_to_8i32:
1565; AVX512VL:       # %bb.0:
1566; AVX512VL-NEXT:    vcvttps2udq %ymm0, %ymm0
1567; AVX512VL-NEXT:    retq
1568;
1569; AVX512DQ-LABEL: fptoui_8f32_to_8i32:
1570; AVX512DQ:       # %bb.0:
1571; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1572; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
1573; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1574; AVX512DQ-NEXT:    retq
1575;
1576; AVX512VLDQ-LABEL: fptoui_8f32_to_8i32:
1577; AVX512VLDQ:       # %bb.0:
1578; AVX512VLDQ-NEXT:    vcvttps2udq %ymm0, %ymm0
1579; AVX512VLDQ-NEXT:    retq
1580  %cvt = fptoui <8 x float> %a to <8 x i32>
1581  ret <8 x i32> %cvt
1582}
1583
1584define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
1585; SSE-LABEL: fptoui_4f32_to_4i64:
1586; SSE:       # %bb.0:
1587; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1588; SSE-NEXT:    movaps %xmm0, %xmm2
1589; SSE-NEXT:    subss %xmm1, %xmm2
1590; SSE-NEXT:    cvttss2si %xmm2, %rcx
1591; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1592; SSE-NEXT:    xorq %rax, %rcx
1593; SSE-NEXT:    cvttss2si %xmm0, %rdx
1594; SSE-NEXT:    ucomiss %xmm1, %xmm0
1595; SSE-NEXT:    cmovaeq %rcx, %rdx
1596; SSE-NEXT:    movq %rdx, %xmm2
1597; SSE-NEXT:    movaps %xmm0, %xmm3
1598; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
1599; SSE-NEXT:    movaps %xmm3, %xmm4
1600; SSE-NEXT:    subss %xmm1, %xmm4
1601; SSE-NEXT:    cvttss2si %xmm4, %rcx
1602; SSE-NEXT:    xorq %rax, %rcx
1603; SSE-NEXT:    cvttss2si %xmm3, %rdx
1604; SSE-NEXT:    ucomiss %xmm1, %xmm3
1605; SSE-NEXT:    cmovaeq %rcx, %rdx
1606; SSE-NEXT:    movq %rdx, %xmm3
1607; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1608; SSE-NEXT:    movaps %xmm0, %xmm3
1609; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
1610; SSE-NEXT:    movaps %xmm3, %xmm4
1611; SSE-NEXT:    subss %xmm1, %xmm4
1612; SSE-NEXT:    cvttss2si %xmm4, %rcx
1613; SSE-NEXT:    xorq %rax, %rcx
1614; SSE-NEXT:    cvttss2si %xmm3, %rdx
1615; SSE-NEXT:    ucomiss %xmm1, %xmm3
1616; SSE-NEXT:    cmovaeq %rcx, %rdx
1617; SSE-NEXT:    movq %rdx, %xmm3
1618; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1619; SSE-NEXT:    movaps %xmm0, %xmm4
1620; SSE-NEXT:    subss %xmm1, %xmm4
1621; SSE-NEXT:    cvttss2si %xmm4, %rcx
1622; SSE-NEXT:    xorq %rax, %rcx
1623; SSE-NEXT:    cvttss2si %xmm0, %rax
1624; SSE-NEXT:    ucomiss %xmm1, %xmm0
1625; SSE-NEXT:    cmovaeq %rcx, %rax
1626; SSE-NEXT:    movq %rax, %xmm1
1627; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1628; SSE-NEXT:    movdqa %xmm2, %xmm0
1629; SSE-NEXT:    retq
1630;
1631; AVX1-LABEL: fptoui_4f32_to_4i64:
1632; AVX1:       # %bb.0:
1633; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1634; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1635; AVX1-NEXT:    vsubss %xmm1, %xmm2, %xmm3
1636; AVX1-NEXT:    vcvttss2si %xmm3, %rax
1637; AVX1-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1638; AVX1-NEXT:    xorq %rcx, %rax
1639; AVX1-NEXT:    vcvttss2si %xmm2, %rdx
1640; AVX1-NEXT:    vucomiss %xmm1, %xmm2
1641; AVX1-NEXT:    cmovaeq %rax, %rdx
1642; AVX1-NEXT:    vmovq %rdx, %xmm2
1643; AVX1-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1644; AVX1-NEXT:    vsubss %xmm1, %xmm3, %xmm4
1645; AVX1-NEXT:    vcvttss2si %xmm4, %rax
1646; AVX1-NEXT:    xorq %rcx, %rax
1647; AVX1-NEXT:    vcvttss2si %xmm3, %rdx
1648; AVX1-NEXT:    vucomiss %xmm1, %xmm3
1649; AVX1-NEXT:    cmovaeq %rax, %rdx
1650; AVX1-NEXT:    vmovq %rdx, %xmm3
1651; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1652; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm3
1653; AVX1-NEXT:    vcvttss2si %xmm3, %rax
1654; AVX1-NEXT:    xorq %rcx, %rax
1655; AVX1-NEXT:    vcvttss2si %xmm0, %rdx
1656; AVX1-NEXT:    vucomiss %xmm1, %xmm0
1657; AVX1-NEXT:    cmovaeq %rax, %rdx
1658; AVX1-NEXT:    vmovq %rdx, %xmm3
1659; AVX1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1660; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm4
1661; AVX1-NEXT:    vcvttss2si %xmm4, %rax
1662; AVX1-NEXT:    xorq %rcx, %rax
1663; AVX1-NEXT:    vcvttss2si %xmm0, %rcx
1664; AVX1-NEXT:    vucomiss %xmm1, %xmm0
1665; AVX1-NEXT:    cmovaeq %rax, %rcx
1666; AVX1-NEXT:    vmovq %rcx, %xmm0
1667; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1668; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1669; AVX1-NEXT:    retq
1670;
1671; AVX2-LABEL: fptoui_4f32_to_4i64:
1672; AVX2:       # %bb.0:
1673; AVX2-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1674; AVX2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1675; AVX2-NEXT:    vsubss %xmm1, %xmm2, %xmm3
1676; AVX2-NEXT:    vcvttss2si %xmm3, %rax
1677; AVX2-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1678; AVX2-NEXT:    xorq %rcx, %rax
1679; AVX2-NEXT:    vcvttss2si %xmm2, %rdx
1680; AVX2-NEXT:    vucomiss %xmm1, %xmm2
1681; AVX2-NEXT:    cmovaeq %rax, %rdx
1682; AVX2-NEXT:    vmovq %rdx, %xmm2
1683; AVX2-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1684; AVX2-NEXT:    vsubss %xmm1, %xmm3, %xmm4
1685; AVX2-NEXT:    vcvttss2si %xmm4, %rax
1686; AVX2-NEXT:    xorq %rcx, %rax
1687; AVX2-NEXT:    vcvttss2si %xmm3, %rdx
1688; AVX2-NEXT:    vucomiss %xmm1, %xmm3
1689; AVX2-NEXT:    cmovaeq %rax, %rdx
1690; AVX2-NEXT:    vmovq %rdx, %xmm3
1691; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1692; AVX2-NEXT:    vsubss %xmm1, %xmm0, %xmm3
1693; AVX2-NEXT:    vcvttss2si %xmm3, %rax
1694; AVX2-NEXT:    xorq %rcx, %rax
1695; AVX2-NEXT:    vcvttss2si %xmm0, %rdx
1696; AVX2-NEXT:    vucomiss %xmm1, %xmm0
1697; AVX2-NEXT:    cmovaeq %rax, %rdx
1698; AVX2-NEXT:    vmovq %rdx, %xmm3
1699; AVX2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1700; AVX2-NEXT:    vsubss %xmm1, %xmm0, %xmm4
1701; AVX2-NEXT:    vcvttss2si %xmm4, %rax
1702; AVX2-NEXT:    xorq %rcx, %rax
1703; AVX2-NEXT:    vcvttss2si %xmm0, %rcx
1704; AVX2-NEXT:    vucomiss %xmm1, %xmm0
1705; AVX2-NEXT:    cmovaeq %rax, %rcx
1706; AVX2-NEXT:    vmovq %rcx, %xmm0
1707; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1708; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1709; AVX2-NEXT:    retq
1710;
1711; AVX512F-LABEL: fptoui_4f32_to_4i64:
1712; AVX512F:       # %bb.0:
1713; AVX512F-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1714; AVX512F-NEXT:    vcvttss2usi %xmm1, %rax
1715; AVX512F-NEXT:    vmovq %rax, %xmm1
1716; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1717; AVX512F-NEXT:    vcvttss2usi %xmm2, %rax
1718; AVX512F-NEXT:    vmovq %rax, %xmm2
1719; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1720; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
1721; AVX512F-NEXT:    vmovq %rax, %xmm2
1722; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1723; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
1724; AVX512F-NEXT:    vmovq %rax, %xmm0
1725; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1726; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1727; AVX512F-NEXT:    retq
1728;
1729; AVX512VL-LABEL: fptoui_4f32_to_4i64:
1730; AVX512VL:       # %bb.0:
1731; AVX512VL-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1732; AVX512VL-NEXT:    vcvttss2usi %xmm1, %rax
1733; AVX512VL-NEXT:    vmovq %rax, %xmm1
1734; AVX512VL-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1735; AVX512VL-NEXT:    vcvttss2usi %xmm2, %rax
1736; AVX512VL-NEXT:    vmovq %rax, %xmm2
1737; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1738; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
1739; AVX512VL-NEXT:    vmovq %rax, %xmm2
1740; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1741; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
1742; AVX512VL-NEXT:    vmovq %rax, %xmm0
1743; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1744; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1745; AVX512VL-NEXT:    retq
1746;
1747; AVX512DQ-LABEL: fptoui_4f32_to_4i64:
1748; AVX512DQ:       # %bb.0:
1749; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1750; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1751; AVX512DQ-NEXT:    retq
1752;
1753; AVX512VLDQ-LABEL: fptoui_4f32_to_4i64:
1754; AVX512VLDQ:       # %bb.0:
1755; AVX512VLDQ-NEXT:    vcvttps2uqq %xmm0, %ymm0
1756; AVX512VLDQ-NEXT:    retq
1757  %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1758  %cvt = fptoui <4 x float> %shuf to <4 x i64>
1759  ret <4 x i64> %cvt
1760}
1761
1762define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
1763; SSE-LABEL: fptoui_8f32_to_4i64:
1764; SSE:       # %bb.0:
1765; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1766; SSE-NEXT:    movaps %xmm0, %xmm2
1767; SSE-NEXT:    subss %xmm1, %xmm2
1768; SSE-NEXT:    cvttss2si %xmm2, %rcx
1769; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1770; SSE-NEXT:    xorq %rax, %rcx
1771; SSE-NEXT:    cvttss2si %xmm0, %rdx
1772; SSE-NEXT:    ucomiss %xmm1, %xmm0
1773; SSE-NEXT:    cmovaeq %rcx, %rdx
1774; SSE-NEXT:    movq %rdx, %xmm2
1775; SSE-NEXT:    movaps %xmm0, %xmm3
1776; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
1777; SSE-NEXT:    movaps %xmm3, %xmm4
1778; SSE-NEXT:    subss %xmm1, %xmm4
1779; SSE-NEXT:    cvttss2si %xmm4, %rcx
1780; SSE-NEXT:    xorq %rax, %rcx
1781; SSE-NEXT:    cvttss2si %xmm3, %rdx
1782; SSE-NEXT:    ucomiss %xmm1, %xmm3
1783; SSE-NEXT:    cmovaeq %rcx, %rdx
1784; SSE-NEXT:    movq %rdx, %xmm3
1785; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1786; SSE-NEXT:    movaps %xmm0, %xmm3
1787; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
1788; SSE-NEXT:    movaps %xmm3, %xmm4
1789; SSE-NEXT:    subss %xmm1, %xmm4
1790; SSE-NEXT:    cvttss2si %xmm4, %rcx
1791; SSE-NEXT:    xorq %rax, %rcx
1792; SSE-NEXT:    cvttss2si %xmm3, %rdx
1793; SSE-NEXT:    ucomiss %xmm1, %xmm3
1794; SSE-NEXT:    cmovaeq %rcx, %rdx
1795; SSE-NEXT:    movq %rdx, %xmm3
1796; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1797; SSE-NEXT:    movaps %xmm0, %xmm4
1798; SSE-NEXT:    subss %xmm1, %xmm4
1799; SSE-NEXT:    cvttss2si %xmm4, %rcx
1800; SSE-NEXT:    xorq %rax, %rcx
1801; SSE-NEXT:    cvttss2si %xmm0, %rax
1802; SSE-NEXT:    ucomiss %xmm1, %xmm0
1803; SSE-NEXT:    cmovaeq %rcx, %rax
1804; SSE-NEXT:    movq %rax, %xmm1
1805; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1806; SSE-NEXT:    movdqa %xmm2, %xmm0
1807; SSE-NEXT:    retq
1808;
1809; AVX1-LABEL: fptoui_8f32_to_4i64:
1810; AVX1:       # %bb.0:
1811; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1812; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1813; AVX1-NEXT:    vsubss %xmm1, %xmm2, %xmm3
1814; AVX1-NEXT:    vcvttss2si %xmm3, %rax
1815; AVX1-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1816; AVX1-NEXT:    xorq %rcx, %rax
1817; AVX1-NEXT:    vcvttss2si %xmm2, %rdx
1818; AVX1-NEXT:    vucomiss %xmm1, %xmm2
1819; AVX1-NEXT:    cmovaeq %rax, %rdx
1820; AVX1-NEXT:    vmovq %rdx, %xmm2
1821; AVX1-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1822; AVX1-NEXT:    vsubss %xmm1, %xmm3, %xmm4
1823; AVX1-NEXT:    vcvttss2si %xmm4, %rax
1824; AVX1-NEXT:    xorq %rcx, %rax
1825; AVX1-NEXT:    vcvttss2si %xmm3, %rdx
1826; AVX1-NEXT:    vucomiss %xmm1, %xmm3
1827; AVX1-NEXT:    cmovaeq %rax, %rdx
1828; AVX1-NEXT:    vmovq %rdx, %xmm3
1829; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1830; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm3
1831; AVX1-NEXT:    vcvttss2si %xmm3, %rax
1832; AVX1-NEXT:    xorq %rcx, %rax
1833; AVX1-NEXT:    vcvttss2si %xmm0, %rdx
1834; AVX1-NEXT:    vucomiss %xmm1, %xmm0
1835; AVX1-NEXT:    cmovaeq %rax, %rdx
1836; AVX1-NEXT:    vmovq %rdx, %xmm3
1837; AVX1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1838; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm4
1839; AVX1-NEXT:    vcvttss2si %xmm4, %rax
1840; AVX1-NEXT:    xorq %rcx, %rax
1841; AVX1-NEXT:    vcvttss2si %xmm0, %rcx
1842; AVX1-NEXT:    vucomiss %xmm1, %xmm0
1843; AVX1-NEXT:    cmovaeq %rax, %rcx
1844; AVX1-NEXT:    vmovq %rcx, %xmm0
1845; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1846; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1847; AVX1-NEXT:    retq
1848;
1849; AVX2-LABEL: fptoui_8f32_to_4i64:
1850; AVX2:       # %bb.0:
1851; AVX2-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1852; AVX2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1853; AVX2-NEXT:    vsubss %xmm1, %xmm2, %xmm3
1854; AVX2-NEXT:    vcvttss2si %xmm3, %rax
1855; AVX2-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1856; AVX2-NEXT:    xorq %rcx, %rax
1857; AVX2-NEXT:    vcvttss2si %xmm2, %rdx
1858; AVX2-NEXT:    vucomiss %xmm1, %xmm2
1859; AVX2-NEXT:    cmovaeq %rax, %rdx
1860; AVX2-NEXT:    vmovq %rdx, %xmm2
1861; AVX2-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
1862; AVX2-NEXT:    vsubss %xmm1, %xmm3, %xmm4
1863; AVX2-NEXT:    vcvttss2si %xmm4, %rax
1864; AVX2-NEXT:    xorq %rcx, %rax
1865; AVX2-NEXT:    vcvttss2si %xmm3, %rdx
1866; AVX2-NEXT:    vucomiss %xmm1, %xmm3
1867; AVX2-NEXT:    cmovaeq %rax, %rdx
1868; AVX2-NEXT:    vmovq %rdx, %xmm3
1869; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
1870; AVX2-NEXT:    vsubss %xmm1, %xmm0, %xmm3
1871; AVX2-NEXT:    vcvttss2si %xmm3, %rax
1872; AVX2-NEXT:    xorq %rcx, %rax
1873; AVX2-NEXT:    vcvttss2si %xmm0, %rdx
1874; AVX2-NEXT:    vucomiss %xmm1, %xmm0
1875; AVX2-NEXT:    cmovaeq %rax, %rdx
1876; AVX2-NEXT:    vmovq %rdx, %xmm3
1877; AVX2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1878; AVX2-NEXT:    vsubss %xmm1, %xmm0, %xmm4
1879; AVX2-NEXT:    vcvttss2si %xmm4, %rax
1880; AVX2-NEXT:    xorq %rcx, %rax
1881; AVX2-NEXT:    vcvttss2si %xmm0, %rcx
1882; AVX2-NEXT:    vucomiss %xmm1, %xmm0
1883; AVX2-NEXT:    cmovaeq %rax, %rcx
1884; AVX2-NEXT:    vmovq %rcx, %xmm0
1885; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
1886; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
1887; AVX2-NEXT:    retq
1888;
1889; AVX512F-LABEL: fptoui_8f32_to_4i64:
1890; AVX512F:       # %bb.0:
1891; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1892; AVX512F-NEXT:    vcvttss2usi %xmm1, %rax
1893; AVX512F-NEXT:    vcvttss2usi %xmm0, %rcx
1894; AVX512F-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1895; AVX512F-NEXT:    vcvttss2usi %xmm1, %rdx
1896; AVX512F-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1897; AVX512F-NEXT:    vcvttss2usi %xmm0, %rsi
1898; AVX512F-NEXT:    vmovq %rsi, %xmm0
1899; AVX512F-NEXT:    vmovq %rdx, %xmm1
1900; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1901; AVX512F-NEXT:    vmovq %rcx, %xmm1
1902; AVX512F-NEXT:    vmovq %rax, %xmm2
1903; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1904; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1905; AVX512F-NEXT:    retq
1906;
1907; AVX512VL-LABEL: fptoui_8f32_to_4i64:
1908; AVX512VL:       # %bb.0:
1909; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1910; AVX512VL-NEXT:    vcvttss2usi %xmm1, %rax
1911; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rcx
1912; AVX512VL-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1913; AVX512VL-NEXT:    vcvttss2usi %xmm1, %rdx
1914; AVX512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1915; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rsi
1916; AVX512VL-NEXT:    vmovq %rsi, %xmm0
1917; AVX512VL-NEXT:    vmovq %rdx, %xmm1
1918; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1919; AVX512VL-NEXT:    vmovq %rcx, %xmm1
1920; AVX512VL-NEXT:    vmovq %rax, %xmm2
1921; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1922; AVX512VL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1923; AVX512VL-NEXT:    retq
1924;
1925; AVX512DQ-LABEL: fptoui_8f32_to_4i64:
1926; AVX512DQ:       # %bb.0:
1927; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1928; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1929; AVX512DQ-NEXT:    retq
1930;
1931; AVX512VLDQ-LABEL: fptoui_8f32_to_4i64:
1932; AVX512VLDQ:       # %bb.0:
1933; AVX512VLDQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1934; AVX512VLDQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1935; AVX512VLDQ-NEXT:    retq
1936  %cvt = fptoui <8 x float> %a to <8 x i64>
1937  %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1938  ret <4 x i64> %shuf
1939}
1940
1941;
1942; Constant Folding
1943;
1944
1945define <2 x i64> @fptosi_2f64_to_2i64_const() {
1946; SSE-LABEL: fptosi_2f64_to_2i64_const:
1947; SSE:       # %bb.0:
1948; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1949; SSE-NEXT:    retq
1950;
1951; AVX-LABEL: fptosi_2f64_to_2i64_const:
1952; AVX:       # %bb.0:
1953; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
1954; AVX-NEXT:    retq
1955  %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
1956  ret <2 x i64> %cvt
1957}
1958
1959define <4 x i32> @fptosi_2f64_to_2i32_const() {
1960; SSE-LABEL: fptosi_2f64_to_2i32_const:
1961; SSE:       # %bb.0:
1962; SSE-NEXT:    movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
1963; SSE-NEXT:    retq
1964;
1965; AVX-LABEL: fptosi_2f64_to_2i32_const:
1966; AVX:       # %bb.0:
1967; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
1968; AVX-NEXT:    retq
1969  %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
1970  %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1971  ret <4 x i32> %ext
1972}
1973
1974define <4 x i64> @fptosi_4f64_to_4i64_const() {
1975; SSE-LABEL: fptosi_4f64_to_4i64_const:
1976; SSE:       # %bb.0:
1977; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
1978; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,18446744073709551613]
1979; SSE-NEXT:    retq
1980;
1981; AVX-LABEL: fptosi_4f64_to_4i64_const:
1982; AVX:       # %bb.0:
1983; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
1984; AVX-NEXT:    retq
1985  %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
1986  ret <4 x i64> %cvt
1987}
1988
1989define <4 x i32> @fptosi_4f64_to_4i32_const() {
1990; SSE-LABEL: fptosi_4f64_to_4i32_const:
1991; SSE:       # %bb.0:
1992; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1993; SSE-NEXT:    retq
1994;
1995; AVX-LABEL: fptosi_4f64_to_4i32_const:
1996; AVX:       # %bb.0:
1997; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
1998; AVX-NEXT:    retq
1999  %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
2000  ret <4 x i32> %cvt
2001}
2002
2003define <2 x i64> @fptoui_2f64_to_2i64_const() {
2004; SSE-LABEL: fptoui_2f64_to_2i64_const:
2005; SSE:       # %bb.0:
2006; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
2007; SSE-NEXT:    retq
2008;
2009; AVX-LABEL: fptoui_2f64_to_2i64_const:
2010; AVX:       # %bb.0:
2011; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4]
2012; AVX-NEXT:    retq
2013  %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
2014  ret <2 x i64> %cvt
2015}
2016
2017define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
2018; SSE-LABEL: fptoui_2f64_to_2i32_const:
2019; SSE:       # %bb.0:
2020; SSE-NEXT:    movaps {{.*#+}} xmm0 = <2,4,u,u>
2021; SSE-NEXT:    retq
2022;
2023; AVX-LABEL: fptoui_2f64_to_2i32_const:
2024; AVX:       # %bb.0:
2025; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = <2,4,u,u>
2026; AVX-NEXT:    retq
2027  %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
2028  %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2029  ret <4 x i32> %ext
2030}
2031
2032define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
2033; SSE-LABEL: fptoui_4f64_to_4i64_const:
2034; SSE:       # %bb.0:
2035; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
2036; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,8]
2037; SSE-NEXT:    retq
2038;
2039; AVX-LABEL: fptoui_4f64_to_4i64_const:
2040; AVX:       # %bb.0:
2041; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [2,4,6,8]
2042; AVX-NEXT:    retq
2043  %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
2044  ret <4 x i64> %cvt
2045}
2046
2047define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
2048; SSE-LABEL: fptoui_4f64_to_4i32_const:
2049; SSE:       # %bb.0:
2050; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4,6,8]
2051; SSE-NEXT:    retq
2052;
2053; AVX-LABEL: fptoui_4f64_to_4i32_const:
2054; AVX:       # %bb.0:
2055; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4,6,8]
2056; AVX-NEXT:    retq
2057  %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
2058  ret <4 x i32> %cvt
2059}
2060
2061define <4 x i32> @fptosi_4f32_to_4i32_const() {
2062; SSE-LABEL: fptosi_4f32_to_4i32_const:
2063; SSE:       # %bb.0:
2064; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2065; SSE-NEXT:    retq
2066;
2067; AVX-LABEL: fptosi_4f32_to_4i32_const:
2068; AVX:       # %bb.0:
2069; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2070; AVX-NEXT:    retq
2071  %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
2072  ret <4 x i32> %cvt
2073}
2074
2075define <4 x i64> @fptosi_4f32_to_4i64_const() {
2076; SSE-LABEL: fptosi_4f32_to_4i64_const:
2077; SSE:       # %bb.0:
2078; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
2079; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,3]
2080; SSE-NEXT:    retq
2081;
2082; AVX-LABEL: fptosi_4f32_to_4i64_const:
2083; AVX:       # %bb.0:
2084; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
2085; AVX-NEXT:    retq
2086  %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
2087  ret <4 x i64> %cvt
2088}
2089
2090define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
2091; SSE-LABEL: fptosi_8f32_to_8i32_const:
2092; SSE:       # %bb.0:
2093; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
2094; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
2095; SSE-NEXT:    retq
2096;
2097; AVX-LABEL: fptosi_8f32_to_8i32_const:
2098; AVX:       # %bb.0:
2099; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
2100; AVX-NEXT:    retq
2101  %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
2102  ret <8 x i32> %cvt
2103}
2104
2105define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
2106; SSE-LABEL: fptoui_4f32_to_4i32_const:
2107; SSE:       # %bb.0:
2108; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
2109; SSE-NEXT:    retq
2110;
2111; AVX-LABEL: fptoui_4f32_to_4i32_const:
2112; AVX:       # %bb.0:
2113; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,4,6]
2114; AVX-NEXT:    retq
2115  %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
2116  ret <4 x i32> %cvt
2117}
2118
2119define <4 x i64> @fptoui_4f32_to_4i64_const() {
2120; SSE-LABEL: fptoui_4f32_to_4i64_const:
2121; SSE:       # %bb.0:
2122; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2]
2123; SSE-NEXT:    movaps {{.*#+}} xmm1 = [4,8]
2124; SSE-NEXT:    retq
2125;
2126; AVX-LABEL: fptoui_4f32_to_4i64_const:
2127; AVX:       # %bb.0:
2128; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,8]
2129; AVX-NEXT:    retq
2130  %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
2131  ret <4 x i64> %cvt
2132}
2133
2134define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
2135; SSE-LABEL: fptoui_8f32_to_8i32_const:
2136; SSE:       # %bb.0:
2137; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
2138; SSE-NEXT:    movaps {{.*#+}} xmm1 = [8,6,4,1]
2139; SSE-NEXT:    retq
2140;
2141; AVX-LABEL: fptoui_8f32_to_8i32_const:
2142; AVX:       # %bb.0:
2143; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
2144; AVX-NEXT:    retq
2145  %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
2146  ret <8 x i32> %cvt
2147}
2148
2149;
2150; Special Cases
2151;
2152
2153define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
2154; SSE-LABEL: fptosi_2f16_to_4i32:
2155; SSE:       # %bb.0:
2156; SSE-NEXT:    pushq %rbp
2157; SSE-NEXT:    pushq %rbx
2158; SSE-NEXT:    pushq %rax
2159; SSE-NEXT:    movl %esi, %ebx
2160; SSE-NEXT:    movzwl %di, %edi
2161; SSE-NEXT:    callq __gnu_h2f_ieee
2162; SSE-NEXT:    cvttss2si %xmm0, %ebp
2163; SSE-NEXT:    movzwl %bx, %edi
2164; SSE-NEXT:    callq __gnu_h2f_ieee
2165; SSE-NEXT:    cvttss2si %xmm0, %eax
2166; SSE-NEXT:    movd %eax, %xmm0
2167; SSE-NEXT:    movd %ebp, %xmm1
2168; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2169; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
2170; SSE-NEXT:    addq $8, %rsp
2171; SSE-NEXT:    popq %rbx
2172; SSE-NEXT:    popq %rbp
2173; SSE-NEXT:    retq
2174;
2175; VEX-LABEL: fptosi_2f16_to_4i32:
2176; VEX:       # %bb.0:
2177; VEX-NEXT:    pushq %rbp
2178; VEX-NEXT:    pushq %rbx
2179; VEX-NEXT:    pushq %rax
2180; VEX-NEXT:    movl %esi, %ebx
2181; VEX-NEXT:    movzwl %di, %edi
2182; VEX-NEXT:    callq __gnu_h2f_ieee
2183; VEX-NEXT:    vcvttss2si %xmm0, %ebp
2184; VEX-NEXT:    movzwl %bx, %edi
2185; VEX-NEXT:    callq __gnu_h2f_ieee
2186; VEX-NEXT:    vcvttss2si %xmm0, %eax
2187; VEX-NEXT:    vmovd %eax, %xmm0
2188; VEX-NEXT:    vmovd %ebp, %xmm1
2189; VEX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2190; VEX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2191; VEX-NEXT:    addq $8, %rsp
2192; VEX-NEXT:    popq %rbx
2193; VEX-NEXT:    popq %rbp
2194; VEX-NEXT:    retq
2195;
2196; AVX512-LABEL: fptosi_2f16_to_4i32:
2197; AVX512:       # %bb.0:
2198; AVX512-NEXT:    movzwl %di, %eax
2199; AVX512-NEXT:    vmovd %eax, %xmm0
2200; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
2201; AVX512-NEXT:    vcvttss2si %xmm0, %eax
2202; AVX512-NEXT:    movzwl %si, %ecx
2203; AVX512-NEXT:    vmovd %ecx, %xmm0
2204; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
2205; AVX512-NEXT:    vcvttss2si %xmm0, %ecx
2206; AVX512-NEXT:    vmovd %ecx, %xmm0
2207; AVX512-NEXT:    vmovd %eax, %xmm1
2208; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2209; AVX512-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2210; AVX512-NEXT:    retq
2211  %cvt = fptosi <2 x half> %a to <2 x i32>
2212  %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2213  ret <4 x i32> %ext
2214}
2215
2216define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind {
2217; SSE-LABEL: fptosi_2f80_to_4i32:
2218; SSE:       # %bb.0:
2219; SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
2220; SSE-NEXT:    fldt {{[0-9]+}}(%rsp)
2221; SSE-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
2222; SSE-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
2223; SSE-NEXT:    orl $3072, %eax # imm = 0xC00
2224; SSE-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
2225; SSE-NEXT:    fldcw -{{[0-9]+}}(%rsp)
2226; SSE-NEXT:    fistpl -{{[0-9]+}}(%rsp)
2227; SSE-NEXT:    fldcw -{{[0-9]+}}(%rsp)
2228; SSE-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
2229; SSE-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
2230; SSE-NEXT:    orl $3072, %eax # imm = 0xC00
2231; SSE-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
2232; SSE-NEXT:    fldcw -{{[0-9]+}}(%rsp)
2233; SSE-NEXT:    fistpl -{{[0-9]+}}(%rsp)
2234; SSE-NEXT:    fldcw -{{[0-9]+}}(%rsp)
2235; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2236; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2237; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2238; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
2239; SSE-NEXT:    retq
2240;
2241; AVX-LABEL: fptosi_2f80_to_4i32:
2242; AVX:       # %bb.0:
2243; AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
2244; AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
2245; AVX-NEXT:    fisttpl -{{[0-9]+}}(%rsp)
2246; AVX-NEXT:    fisttpl -{{[0-9]+}}(%rsp)
2247; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2248; AVX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2249; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2250; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2251; AVX-NEXT:    retq
2252  %cvt = fptosi <2 x x86_fp80> %a to <2 x i32>
2253  %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2254  ret <4 x i32> %ext
2255}
2256
2257define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
2258; SSE-LABEL: fptosi_2f128_to_4i32:
2259; SSE:       # %bb.0:
2260; SSE-NEXT:    pushq %rbx
2261; SSE-NEXT:    subq $16, %rsp
2262; SSE-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
2263; SSE-NEXT:    callq __fixtfsi
2264; SSE-NEXT:    movl %eax, %ebx
2265; SSE-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2266; SSE-NEXT:    callq __fixtfsi
2267; SSE-NEXT:    movd %eax, %xmm0
2268; SSE-NEXT:    movd %ebx, %xmm1
2269; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2270; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
2271; SSE-NEXT:    addq $16, %rsp
2272; SSE-NEXT:    popq %rbx
2273; SSE-NEXT:    retq
2274;
2275; AVX-LABEL: fptosi_2f128_to_4i32:
2276; AVX:       # %bb.0:
2277; AVX-NEXT:    pushq %rbx
2278; AVX-NEXT:    subq $16, %rsp
2279; AVX-NEXT:    vmovaps %xmm1, (%rsp) # 16-byte Spill
2280; AVX-NEXT:    callq __fixtfsi
2281; AVX-NEXT:    movl %eax, %ebx
2282; AVX-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
2283; AVX-NEXT:    callq __fixtfsi
2284; AVX-NEXT:    vmovd %eax, %xmm0
2285; AVX-NEXT:    vmovd %ebx, %xmm1
2286; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2287; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
2288; AVX-NEXT:    addq $16, %rsp
2289; AVX-NEXT:    popq %rbx
2290; AVX-NEXT:    retq
2291  %cvt = fptosi <2 x fp128> %a to <2 x i32>
2292  %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2293  ret <4 x i32> %ext
2294}
2295
2296define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
2297; SSE-LABEL: fptosi_2f32_to_2i8:
2298; SSE:       # %bb.0:
2299; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2300; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
2301; SSE-NEXT:    packuswb %xmm0, %xmm0
2302; SSE-NEXT:    packuswb %xmm0, %xmm0
2303; SSE-NEXT:    retq
2304;
2305; VEX-LABEL: fptosi_2f32_to_2i8:
2306; VEX:       # %bb.0:
2307; VEX-NEXT:    vcvttps2dq %xmm0, %xmm0
2308; VEX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2309; VEX-NEXT:    retq
2310;
2311; AVX512F-LABEL: fptosi_2f32_to_2i8:
2312; AVX512F:       # %bb.0:
2313; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
2314; AVX512F-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2315; AVX512F-NEXT:    retq
2316;
2317; AVX512VL-LABEL: fptosi_2f32_to_2i8:
2318; AVX512VL:       # %bb.0:
2319; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
2320; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2321; AVX512VL-NEXT:    retq
2322;
2323; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
2324; AVX512DQ:       # %bb.0:
2325; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2326; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2327; AVX512DQ-NEXT:    retq
2328;
2329; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
2330; AVX512VLDQ:       # %bb.0:
2331; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2332; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2333; AVX512VLDQ-NEXT:    retq
2334  %cvt = fptosi <2 x float> %a to <2 x i8>
2335  ret <2 x i8> %cvt
2336}
2337
2338define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
2339; SSE-LABEL: fptosi_2f32_to_2i16:
2340; SSE:       # %bb.0:
2341; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2342; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2343; SSE-NEXT:    retq
2344;
2345; AVX-LABEL: fptosi_2f32_to_2i16:
2346; AVX:       # %bb.0:
2347; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
2348; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2349; AVX-NEXT:    retq
2350  %cvt = fptosi <2 x float> %a to <2 x i16>
2351  ret <2 x i16> %cvt
2352}
2353
2354define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
2355; SSE-LABEL: fptoui_2f32_to_2i8:
2356; SSE:       # %bb.0:
2357; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2358; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
2359; SSE-NEXT:    packuswb %xmm0, %xmm0
2360; SSE-NEXT:    packuswb %xmm0, %xmm0
2361; SSE-NEXT:    retq
2362;
2363; VEX-LABEL: fptoui_2f32_to_2i8:
2364; VEX:       # %bb.0:
2365; VEX-NEXT:    vcvttps2dq %xmm0, %xmm0
2366; VEX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2367; VEX-NEXT:    retq
2368;
2369; AVX512F-LABEL: fptoui_2f32_to_2i8:
2370; AVX512F:       # %bb.0:
2371; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
2372; AVX512F-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2373; AVX512F-NEXT:    retq
2374;
2375; AVX512VL-LABEL: fptoui_2f32_to_2i8:
2376; AVX512VL:       # %bb.0:
2377; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
2378; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2379; AVX512VL-NEXT:    retq
2380;
2381; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
2382; AVX512DQ:       # %bb.0:
2383; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2384; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2385; AVX512DQ-NEXT:    retq
2386;
2387; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
2388; AVX512VLDQ:       # %bb.0:
2389; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
2390; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2391; AVX512VLDQ-NEXT:    retq
2392  %cvt = fptoui <2 x float> %a to <2 x i8>
2393  ret <2 x i8> %cvt
2394}
2395
2396define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
2397; SSE-LABEL: fptoui_2f32_to_2i16:
2398; SSE:       # %bb.0:
2399; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2400; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2401; SSE-NEXT:    retq
2402;
2403; AVX-LABEL: fptoui_2f32_to_2i16:
2404; AVX:       # %bb.0:
2405; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
2406; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2407; AVX-NEXT:    retq
2408  %cvt = fptoui <2 x float> %a to <2 x i16>
2409  ret <2 x i16> %cvt
2410}
2411
2412define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
2413; SSE-LABEL: fptosi_2f64_to_2i8:
2414; SSE:       # %bb.0:
2415; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2416; SSE-NEXT:    andpd {{.*}}(%rip), %xmm0
2417; SSE-NEXT:    packuswb %xmm0, %xmm0
2418; SSE-NEXT:    packuswb %xmm0, %xmm0
2419; SSE-NEXT:    retq
2420;
2421; VEX-LABEL: fptosi_2f64_to_2i8:
2422; VEX:       # %bb.0:
2423; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
2424; VEX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2425; VEX-NEXT:    retq
2426;
2427; AVX512F-LABEL: fptosi_2f64_to_2i8:
2428; AVX512F:       # %bb.0:
2429; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
2430; AVX512F-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2431; AVX512F-NEXT:    retq
2432;
2433; AVX512VL-LABEL: fptosi_2f64_to_2i8:
2434; AVX512VL:       # %bb.0:
2435; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
2436; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2437; AVX512VL-NEXT:    retq
2438;
2439; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
2440; AVX512DQ:       # %bb.0:
2441; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2442; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2443; AVX512DQ-NEXT:    retq
2444;
2445; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
2446; AVX512VLDQ:       # %bb.0:
2447; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2448; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2449; AVX512VLDQ-NEXT:    retq
2450  %cvt = fptosi <2 x double> %a to <2 x i8>
2451  ret <2 x i8> %cvt
2452}
2453
2454define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
2455; SSE-LABEL: fptosi_2f64_to_2i16:
2456; SSE:       # %bb.0:
2457; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2458; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2459; SSE-NEXT:    retq
2460;
2461; AVX-LABEL: fptosi_2f64_to_2i16:
2462; AVX:       # %bb.0:
2463; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
2464; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2465; AVX-NEXT:    retq
2466  %cvt = fptosi <2 x double> %a to <2 x i16>
2467  ret <2 x i16> %cvt
2468}
2469
2470define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
2471; SSE-LABEL: fptoui_2f64_to_2i8:
2472; SSE:       # %bb.0:
2473; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2474; SSE-NEXT:    andpd {{.*}}(%rip), %xmm0
2475; SSE-NEXT:    packuswb %xmm0, %xmm0
2476; SSE-NEXT:    packuswb %xmm0, %xmm0
2477; SSE-NEXT:    retq
2478;
2479; VEX-LABEL: fptoui_2f64_to_2i8:
2480; VEX:       # %bb.0:
2481; VEX-NEXT:    vcvttpd2dq %xmm0, %xmm0
2482; VEX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2483; VEX-NEXT:    retq
2484;
2485; AVX512F-LABEL: fptoui_2f64_to_2i8:
2486; AVX512F:       # %bb.0:
2487; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
2488; AVX512F-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2489; AVX512F-NEXT:    retq
2490;
2491; AVX512VL-LABEL: fptoui_2f64_to_2i8:
2492; AVX512VL:       # %bb.0:
2493; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
2494; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
2495; AVX512VL-NEXT:    retq
2496;
2497; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
2498; AVX512DQ:       # %bb.0:
2499; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2500; AVX512DQ-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
2501; AVX512DQ-NEXT:    retq
2502;
2503; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
2504; AVX512VLDQ:       # %bb.0:
2505; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
2506; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
2507; AVX512VLDQ-NEXT:    retq
2508  %cvt = fptoui <2 x double> %a to <2 x i8>
2509  ret <2 x i8> %cvt
2510}
2511
2512define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
2513; SSE-LABEL: fptoui_2f64_to_2i16:
2514; SSE:       # %bb.0:
2515; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2516; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2517; SSE-NEXT:    retq
2518;
2519; AVX-LABEL: fptoui_2f64_to_2i16:
2520; AVX:       # %bb.0:
2521; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
2522; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2523; AVX-NEXT:    retq
2524  %cvt = fptoui <2 x double> %a to <2 x i16>
2525  ret <2 x i16> %cvt
2526}
2527
2528define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
2529; SSE-LABEL: fptosi_8f64_to_8i16:
2530; SSE:       # %bb.0:
2531; SSE-NEXT:    cvttpd2dq %xmm3, %xmm3
2532; SSE-NEXT:    cvttpd2dq %xmm2, %xmm2
2533; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2534; SSE-NEXT:    cvttpd2dq %xmm1, %xmm1
2535; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2536; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2537; SSE-NEXT:    packssdw %xmm2, %xmm0
2538; SSE-NEXT:    retq
2539;
2540; VEX-LABEL: fptosi_8f64_to_8i16:
2541; VEX:       # %bb.0:
2542; VEX-NEXT:    vcvttpd2dq %ymm1, %xmm1
2543; VEX-NEXT:    vcvttpd2dq %ymm0, %xmm0
2544; VEX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
2545; VEX-NEXT:    vzeroupper
2546; VEX-NEXT:    retq
2547;
2548; AVX512F-LABEL: fptosi_8f64_to_8i16:
2549; AVX512F:       # %bb.0:
2550; AVX512F-NEXT:    vcvttpd2dq %zmm0, %ymm0
2551; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
2552; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2553; AVX512F-NEXT:    vzeroupper
2554; AVX512F-NEXT:    retq
2555;
2556; AVX512VL-LABEL: fptosi_8f64_to_8i16:
2557; AVX512VL:       # %bb.0:
2558; AVX512VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
2559; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
2560; AVX512VL-NEXT:    vzeroupper
2561; AVX512VL-NEXT:    retq
2562;
2563; AVX512DQ-LABEL: fptosi_8f64_to_8i16:
2564; AVX512DQ:       # %bb.0:
2565; AVX512DQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2566; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
2567; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2568; AVX512DQ-NEXT:    vzeroupper
2569; AVX512DQ-NEXT:    retq
2570;
2571; AVX512VLDQ-LABEL: fptosi_8f64_to_8i16:
2572; AVX512VLDQ:       # %bb.0:
2573; AVX512VLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2574; AVX512VLDQ-NEXT:    vpmovdw %ymm0, %xmm0
2575; AVX512VLDQ-NEXT:    vzeroupper
2576; AVX512VLDQ-NEXT:    retq
2577  %cvt = fptosi <8 x double> %a to <8 x i16>
2578  ret <8 x i16> %cvt
2579}
2580
2581define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) {
2582; SSE-LABEL: fptoui_8f64_to_8i16:
2583; SSE:       # %bb.0:
2584; SSE-NEXT:    cvttpd2dq %xmm3, %xmm3
2585; SSE-NEXT:    cvttpd2dq %xmm2, %xmm2
2586; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2587; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
2588; SSE-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
2589; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2590; SSE-NEXT:    cvttpd2dq %xmm1, %xmm1
2591; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
2592; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2593; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2594; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
2595; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2596; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2597; SSE-NEXT:    retq
2598;
2599; VEX-LABEL: fptoui_8f64_to_8i16:
2600; VEX:       # %bb.0:
2601; VEX-NEXT:    vcvttpd2dq %ymm1, %xmm1
2602; VEX-NEXT:    vcvttpd2dq %ymm0, %xmm0
2603; VEX-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
2604; VEX-NEXT:    vzeroupper
2605; VEX-NEXT:    retq
2606;
2607; AVX512F-LABEL: fptoui_8f64_to_8i16:
2608; AVX512F:       # %bb.0:
2609; AVX512F-NEXT:    vcvttpd2dq %zmm0, %ymm0
2610; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
2611; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2612; AVX512F-NEXT:    vzeroupper
2613; AVX512F-NEXT:    retq
2614;
2615; AVX512VL-LABEL: fptoui_8f64_to_8i16:
2616; AVX512VL:       # %bb.0:
2617; AVX512VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
2618; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
2619; AVX512VL-NEXT:    vzeroupper
2620; AVX512VL-NEXT:    retq
2621;
2622; AVX512DQ-LABEL: fptoui_8f64_to_8i16:
2623; AVX512DQ:       # %bb.0:
2624; AVX512DQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2625; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
2626; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
2627; AVX512DQ-NEXT:    vzeroupper
2628; AVX512DQ-NEXT:    retq
2629;
2630; AVX512VLDQ-LABEL: fptoui_8f64_to_8i16:
2631; AVX512VLDQ:       # %bb.0:
2632; AVX512VLDQ-NEXT:    vcvttpd2dq %zmm0, %ymm0
2633; AVX512VLDQ-NEXT:    vpmovdw %ymm0, %xmm0
2634; AVX512VLDQ-NEXT:    vzeroupper
2635; AVX512VLDQ-NEXT:    retq
2636  %cvt = fptoui <8 x double> %a to <8 x i16>
2637  ret <8 x i16> %cvt
2638}
2639
2640define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
2641; SSE-LABEL: fptosi_16f32_to_16i8:
2642; SSE:       # %bb.0:
2643; SSE-NEXT:    cvttps2dq %xmm3, %xmm3
2644; SSE-NEXT:    cvttps2dq %xmm2, %xmm2
2645; SSE-NEXT:    packssdw %xmm3, %xmm2
2646; SSE-NEXT:    cvttps2dq %xmm1, %xmm1
2647; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2648; SSE-NEXT:    packssdw %xmm1, %xmm0
2649; SSE-NEXT:    packsswb %xmm2, %xmm0
2650; SSE-NEXT:    retq
2651;
2652; AVX1-LABEL: fptosi_16f32_to_16i8:
2653; AVX1:       # %bb.0:
2654; AVX1-NEXT:    vcvttps2dq %ymm1, %ymm1
2655; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2656; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
2657; AVX1-NEXT:    vcvttps2dq %ymm0, %ymm0
2658; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2659; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
2660; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
2661; AVX1-NEXT:    vzeroupper
2662; AVX1-NEXT:    retq
2663;
2664; AVX2-LABEL: fptosi_16f32_to_16i8:
2665; AVX2:       # %bb.0:
2666; AVX2-NEXT:    vcvttps2dq %ymm1, %ymm1
2667; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
2668; AVX2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
2669; AVX2-NEXT:    vcvttps2dq %ymm0, %ymm0
2670; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
2671; AVX2-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
2672; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
2673; AVX2-NEXT:    vzeroupper
2674; AVX2-NEXT:    retq
2675;
2676; AVX512-LABEL: fptosi_16f32_to_16i8:
2677; AVX512:       # %bb.0:
2678; AVX512-NEXT:    vcvttps2dq %zmm0, %zmm0
2679; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
2680; AVX512-NEXT:    vzeroupper
2681; AVX512-NEXT:    retq
2682  %cvt = fptosi <16 x float> %a to <16 x i8>
2683  ret <16 x i8> %cvt
2684}
2685
2686define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
2687; SSE-LABEL: fptoui_16f32_to_16i8:
2688; SSE:       # %bb.0:
2689; SSE-NEXT:    cvttps2dq %xmm3, %xmm3
2690; SSE-NEXT:    cvttps2dq %xmm2, %xmm2
2691; SSE-NEXT:    packssdw %xmm3, %xmm2
2692; SSE-NEXT:    cvttps2dq %xmm1, %xmm1
2693; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
2694; SSE-NEXT:    packssdw %xmm1, %xmm0
2695; SSE-NEXT:    packuswb %xmm2, %xmm0
2696; SSE-NEXT:    retq
2697;
2698; AVX1-LABEL: fptoui_16f32_to_16i8:
2699; AVX1:       # %bb.0:
2700; AVX1-NEXT:    vcvttps2dq %ymm1, %ymm1
2701; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
2702; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
2703; AVX1-NEXT:    vcvttps2dq %ymm0, %ymm0
2704; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2705; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
2706; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
2707; AVX1-NEXT:    vzeroupper
2708; AVX1-NEXT:    retq
2709;
2710; AVX2-LABEL: fptoui_16f32_to_16i8:
2711; AVX2:       # %bb.0:
2712; AVX2-NEXT:    vcvttps2dq %ymm1, %ymm1
2713; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
2714; AVX2-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
2715; AVX2-NEXT:    vcvttps2dq %ymm0, %ymm0
2716; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
2717; AVX2-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
2718; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
2719; AVX2-NEXT:    vzeroupper
2720; AVX2-NEXT:    retq
2721;
2722; AVX512-LABEL: fptoui_16f32_to_16i8:
2723; AVX512:       # %bb.0:
2724; AVX512-NEXT:    vcvttps2dq %zmm0, %zmm0
2725; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
2726; AVX512-NEXT:    vzeroupper
2727; AVX512-NEXT:    retq
2728  %cvt = fptoui <16 x float> %a to <16 x i8>
2729  ret <16 x i8> %cvt
2730}
2731
2732define <2 x i64> @fptosi_2f32_to_2i64_load(<2 x float>* %x) {
2733; SSE-LABEL: fptosi_2f32_to_2i64_load:
2734; SSE:       # %bb.0:
2735; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
2736; SSE-NEXT:    cvttss2si %xmm1, %rax
2737; SSE-NEXT:    movq %rax, %xmm0
2738; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2739; SSE-NEXT:    cvttss2si %xmm1, %rax
2740; SSE-NEXT:    movq %rax, %xmm1
2741; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2742; SSE-NEXT:    retq
2743;
2744; VEX-LABEL: fptosi_2f32_to_2i64_load:
2745; VEX:       # %bb.0:
2746; VEX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2747; VEX-NEXT:    vcvttss2si %xmm0, %rax
2748; VEX-NEXT:    vmovq %rax, %xmm1
2749; VEX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2750; VEX-NEXT:    vcvttss2si %xmm0, %rax
2751; VEX-NEXT:    vmovq %rax, %xmm0
2752; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2753; VEX-NEXT:    retq
2754;
2755; AVX512F-LABEL: fptosi_2f32_to_2i64_load:
2756; AVX512F:       # %bb.0:
2757; AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2758; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
2759; AVX512F-NEXT:    vmovq %rax, %xmm1
2760; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2761; AVX512F-NEXT:    vcvttss2si %xmm0, %rax
2762; AVX512F-NEXT:    vmovq %rax, %xmm0
2763; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2764; AVX512F-NEXT:    retq
2765;
2766; AVX512VL-LABEL: fptosi_2f32_to_2i64_load:
2767; AVX512VL:       # %bb.0:
2768; AVX512VL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2769; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
2770; AVX512VL-NEXT:    vmovq %rax, %xmm1
2771; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2772; AVX512VL-NEXT:    vcvttss2si %xmm0, %rax
2773; AVX512VL-NEXT:    vmovq %rax, %xmm0
2774; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2775; AVX512VL-NEXT:    retq
2776;
2777; AVX512DQ-LABEL: fptosi_2f32_to_2i64_load:
2778; AVX512DQ:       # %bb.0:
2779; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2780; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
2781; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2782; AVX512DQ-NEXT:    vzeroupper
2783; AVX512DQ-NEXT:    retq
2784;
2785; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load:
2786; AVX512VLDQ:       # %bb.0:
2787; AVX512VLDQ-NEXT:    vcvttps2qq (%rdi), %xmm0
2788; AVX512VLDQ-NEXT:    retq
2789  %a = load <2 x float>, <2 x float>* %x
2790  %b = fptosi <2 x float> %a to <2 x i64>
2791  ret <2 x i64> %b
2792}
2793
2794define <2 x i64> @fptoui_2f32_to_2i64_load(<2 x float>* %x) {
2795; SSE-LABEL: fptoui_2f32_to_2i64_load:
2796; SSE:       # %bb.0:
2797; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
2798; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2799; SSE-NEXT:    movaps %xmm1, %xmm0
2800; SSE-NEXT:    subss %xmm2, %xmm0
2801; SSE-NEXT:    cvttss2si %xmm0, %rax
2802; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
2803; SSE-NEXT:    xorq %rcx, %rax
2804; SSE-NEXT:    cvttss2si %xmm1, %rdx
2805; SSE-NEXT:    ucomiss %xmm2, %xmm1
2806; SSE-NEXT:    cmovaeq %rax, %rdx
2807; SSE-NEXT:    movq %rdx, %xmm0
2808; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2809; SSE-NEXT:    movaps %xmm1, %xmm3
2810; SSE-NEXT:    subss %xmm2, %xmm3
2811; SSE-NEXT:    cvttss2si %xmm3, %rax
2812; SSE-NEXT:    xorq %rcx, %rax
2813; SSE-NEXT:    cvttss2si %xmm1, %rcx
2814; SSE-NEXT:    ucomiss %xmm2, %xmm1
2815; SSE-NEXT:    cmovaeq %rax, %rcx
2816; SSE-NEXT:    movq %rcx, %xmm1
2817; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2818; SSE-NEXT:    retq
2819;
2820; VEX-LABEL: fptoui_2f32_to_2i64_load:
2821; VEX:       # %bb.0:
2822; VEX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2823; VEX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2824; VEX-NEXT:    vsubss %xmm1, %xmm0, %xmm2
2825; VEX-NEXT:    vcvttss2si %xmm2, %rax
2826; VEX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
2827; VEX-NEXT:    xorq %rcx, %rax
2828; VEX-NEXT:    vcvttss2si %xmm0, %rdx
2829; VEX-NEXT:    vucomiss %xmm1, %xmm0
2830; VEX-NEXT:    cmovaeq %rax, %rdx
2831; VEX-NEXT:    vmovq %rdx, %xmm2
2832; VEX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2833; VEX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
2834; VEX-NEXT:    vcvttss2si %xmm3, %rax
2835; VEX-NEXT:    xorq %rcx, %rax
2836; VEX-NEXT:    vcvttss2si %xmm0, %rcx
2837; VEX-NEXT:    vucomiss %xmm1, %xmm0
2838; VEX-NEXT:    cmovaeq %rax, %rcx
2839; VEX-NEXT:    vmovq %rcx, %xmm0
2840; VEX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
2841; VEX-NEXT:    retq
2842;
2843; AVX512F-LABEL: fptoui_2f32_to_2i64_load:
2844; AVX512F:       # %bb.0:
2845; AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2846; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
2847; AVX512F-NEXT:    vmovq %rax, %xmm1
2848; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2849; AVX512F-NEXT:    vcvttss2usi %xmm0, %rax
2850; AVX512F-NEXT:    vmovq %rax, %xmm0
2851; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2852; AVX512F-NEXT:    retq
2853;
2854; AVX512VL-LABEL: fptoui_2f32_to_2i64_load:
2855; AVX512VL:       # %bb.0:
2856; AVX512VL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2857; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
2858; AVX512VL-NEXT:    vmovq %rax, %xmm1
2859; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
2860; AVX512VL-NEXT:    vcvttss2usi %xmm0, %rax
2861; AVX512VL-NEXT:    vmovq %rax, %xmm0
2862; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2863; AVX512VL-NEXT:    retq
2864;
2865; AVX512DQ-LABEL: fptoui_2f32_to_2i64_load:
2866; AVX512DQ:       # %bb.0:
2867; AVX512DQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2868; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
2869; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2870; AVX512DQ-NEXT:    vzeroupper
2871; AVX512DQ-NEXT:    retq
2872;
2873; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load:
2874; AVX512VLDQ:       # %bb.0:
2875; AVX512VLDQ-NEXT:    vcvttps2uqq (%rdi), %xmm0
2876; AVX512VLDQ-NEXT:    retq
2877  %a = load <2 x float>, <2 x float>* %x
2878  %b = fptoui <2 x float> %a to <2 x i64>
2879  ret <2 x i64> %b
2880}
2881