• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ
10; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL
12
13
14declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata)
15declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double>, metadata)
16declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata)
17declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata)
18declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double>, metadata)
19declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double>, metadata)
20declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f64(<4 x double>, metadata)
21declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f64(<4 x double>, metadata)
22declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f64(<4 x double>, metadata)
23declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f64(<4 x double>, metadata)
24declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float>, metadata)
25declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float>, metadata)
26declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f64(<4 x double>, metadata)
27declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f64(<4 x double>, metadata)
28declare <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f32(<8 x float>, metadata)
29declare <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f32(<8 x float>, metadata)
30declare <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f32(<8 x float>, metadata)
31declare <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f32(<8 x float>, metadata)
32declare <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f32(<8 x float>, metadata)
33declare <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f32(<8 x float>, metadata)
34declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float>, metadata)
35declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float>, metadata)
36
37define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 {
38; AVX-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
39; AVX-32:       # %bb.0:
40; AVX-32-NEXT:    pushl %ebp
41; AVX-32-NEXT:    .cfi_def_cfa_offset 8
42; AVX-32-NEXT:    .cfi_offset %ebp, -8
43; AVX-32-NEXT:    movl %esp, %ebp
44; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
45; AVX-32-NEXT:    andl $-8, %esp
46; AVX-32-NEXT:    subl $32, %esp
47; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
48; AVX-32-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp)
49; AVX-32-NEXT:    vextractf128 $1, %ymm0, %xmm0
50; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
51; AVX-32-NEXT:    vmovhps %xmm0, (%esp)
52; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
53; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
54; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
55; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
56; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
57; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
58; AVX-32-NEXT:    fldl (%esp)
59; AVX-32-NEXT:    fisttpll (%esp)
60; AVX-32-NEXT:    wait
61; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
62; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
63; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
64; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
65; AVX-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
66; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
67; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm1, %xmm1
68; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
69; AVX-32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
70; AVX-32-NEXT:    movl %ebp, %esp
71; AVX-32-NEXT:    popl %ebp
72; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
73; AVX-32-NEXT:    retl
74;
75; AVX-64-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
76; AVX-64:       # %bb.0:
77; AVX-64-NEXT:    vextractf128 $1, %ymm0, %xmm1
78; AVX-64-NEXT:    vcvttsd2si %xmm1, %rax
79; AVX-64-NEXT:    vmovq %rax, %xmm2
80; AVX-64-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
81; AVX-64-NEXT:    vcvttsd2si %xmm1, %rax
82; AVX-64-NEXT:    vmovq %rax, %xmm1
83; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
84; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
85; AVX-64-NEXT:    vmovq %rax, %xmm2
86; AVX-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
87; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
88; AVX-64-NEXT:    vmovq %rax, %xmm0
89; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
90; AVX-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
91; AVX-64-NEXT:    retq
92;
93; AVX512F-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
94; AVX512F-32:       # %bb.0:
95; AVX512F-32-NEXT:    pushl %ebp
96; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
97; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
98; AVX512F-32-NEXT:    movl %esp, %ebp
99; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
100; AVX512F-32-NEXT:    andl $-8, %esp
101; AVX512F-32-NEXT:    subl $32, %esp
102; AVX512F-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
103; AVX512F-32-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp)
104; AVX512F-32-NEXT:    vextractf128 $1, %ymm0, %xmm0
105; AVX512F-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
106; AVX512F-32-NEXT:    vmovhps %xmm0, (%esp)
107; AVX512F-32-NEXT:    fldl {{[0-9]+}}(%esp)
108; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
109; AVX512F-32-NEXT:    fldl {{[0-9]+}}(%esp)
110; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
111; AVX512F-32-NEXT:    fldl {{[0-9]+}}(%esp)
112; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
113; AVX512F-32-NEXT:    fldl (%esp)
114; AVX512F-32-NEXT:    fisttpll (%esp)
115; AVX512F-32-NEXT:    wait
116; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
117; AVX512F-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
118; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
119; AVX512F-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
120; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
121; AVX512F-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
122; AVX512F-32-NEXT:    vpinsrd $2, (%esp), %xmm1, %xmm1
123; AVX512F-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
124; AVX512F-32-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
125; AVX512F-32-NEXT:    movl %ebp, %esp
126; AVX512F-32-NEXT:    popl %ebp
127; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
128; AVX512F-32-NEXT:    retl
129;
130; AVX512F-64-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
131; AVX512F-64:       # %bb.0:
132; AVX512F-64-NEXT:    vextractf128 $1, %ymm0, %xmm1
133; AVX512F-64-NEXT:    vcvttsd2si %xmm1, %rax
134; AVX512F-64-NEXT:    vmovq %rax, %xmm2
135; AVX512F-64-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
136; AVX512F-64-NEXT:    vcvttsd2si %xmm1, %rax
137; AVX512F-64-NEXT:    vmovq %rax, %xmm1
138; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
139; AVX512F-64-NEXT:    vcvttsd2si %xmm0, %rax
140; AVX512F-64-NEXT:    vmovq %rax, %xmm2
141; AVX512F-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
142; AVX512F-64-NEXT:    vcvttsd2si %xmm0, %rax
143; AVX512F-64-NEXT:    vmovq %rax, %xmm0
144; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
145; AVX512F-64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
146; AVX512F-64-NEXT:    retq
147;
148; AVX512VL-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
149; AVX512VL-32:       # %bb.0:
150; AVX512VL-32-NEXT:    pushl %ebp
151; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
152; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
153; AVX512VL-32-NEXT:    movl %esp, %ebp
154; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
155; AVX512VL-32-NEXT:    andl $-8, %esp
156; AVX512VL-32-NEXT:    subl $32, %esp
157; AVX512VL-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
158; AVX512VL-32-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp)
159; AVX512VL-32-NEXT:    vextractf128 $1, %ymm0, %xmm0
160; AVX512VL-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
161; AVX512VL-32-NEXT:    vmovhps %xmm0, (%esp)
162; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
163; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
164; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
165; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
166; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
167; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
168; AVX512VL-32-NEXT:    fldl (%esp)
169; AVX512VL-32-NEXT:    fisttpll (%esp)
170; AVX512VL-32-NEXT:    wait
171; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
172; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
173; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
174; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
175; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
176; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
177; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm1, %xmm1
178; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
179; AVX512VL-32-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
180; AVX512VL-32-NEXT:    movl %ebp, %esp
181; AVX512VL-32-NEXT:    popl %ebp
182; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
183; AVX512VL-32-NEXT:    retl
184;
185; AVX512VL-64-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
186; AVX512VL-64:       # %bb.0:
187; AVX512VL-64-NEXT:    vextractf128 $1, %ymm0, %xmm1
188; AVX512VL-64-NEXT:    vcvttsd2si %xmm1, %rax
189; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
190; AVX512VL-64-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
191; AVX512VL-64-NEXT:    vcvttsd2si %xmm1, %rax
192; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
193; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
194; AVX512VL-64-NEXT:    vcvttsd2si %xmm0, %rax
195; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
196; AVX512VL-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
197; AVX512VL-64-NEXT:    vcvttsd2si %xmm0, %rax
198; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
199; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
200; AVX512VL-64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
201; AVX512VL-64-NEXT:    retq
202;
203; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
204; AVX512DQ:       # %bb.0:
205; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
206; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
207; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
208; AVX512DQ-NEXT:    ret{{[l|q]}}
209;
210; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
211; AVX512DQVL:       # %bb.0:
212; AVX512DQVL-NEXT:    vcvttpd2qq %ymm0, %ymm0
213; AVX512DQVL-NEXT:    ret{{[l|q]}}
214  %ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double> %a,
215                                              metadata !"fpexcept.strict") #0
216  ret <4 x i64> %ret
217}
218
219define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
220; AVX-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
221; AVX-32:       # %bb.0:
222; AVX-32-NEXT:    pushl %ebp
223; AVX-32-NEXT:    .cfi_def_cfa_offset 8
224; AVX-32-NEXT:    .cfi_offset %ebp, -8
225; AVX-32-NEXT:    movl %esp, %ebp
226; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
227; AVX-32-NEXT:    andl $-8, %esp
228; AVX-32-NEXT:    subl $32, %esp
229; AVX-32-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
230; AVX-32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
231; AVX-32-NEXT:    vcomisd %xmm1, %xmm2
232; AVX-32-NEXT:    vmovapd %xmm1, %xmm3
233; AVX-32-NEXT:    jae .LBB1_2
234; AVX-32-NEXT:  # %bb.1:
235; AVX-32-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
236; AVX-32-NEXT:  .LBB1_2:
237; AVX-32-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
238; AVX-32-NEXT:    vmovsd %xmm2, {{[0-9]+}}(%esp)
239; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
240; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
241; AVX-32-NEXT:    wait
242; AVX-32-NEXT:    setae %al
243; AVX-32-NEXT:    movzbl %al, %eax
244; AVX-32-NEXT:    shll $31, %eax
245; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
246; AVX-32-NEXT:    vextractf128 $1, %ymm0, %xmm2
247; AVX-32-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
248; AVX-32-NEXT:    vcomisd %xmm1, %xmm3
249; AVX-32-NEXT:    vmovapd %xmm1, %xmm4
250; AVX-32-NEXT:    jae .LBB1_4
251; AVX-32-NEXT:  # %bb.3:
252; AVX-32-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
253; AVX-32-NEXT:  .LBB1_4:
254; AVX-32-NEXT:    vsubsd %xmm4, %xmm3, %xmm3
255; AVX-32-NEXT:    vmovsd %xmm3, (%esp)
256; AVX-32-NEXT:    fldl (%esp)
257; AVX-32-NEXT:    fisttpll (%esp)
258; AVX-32-NEXT:    wait
259; AVX-32-NEXT:    setae %cl
260; AVX-32-NEXT:    movzbl %cl, %ecx
261; AVX-32-NEXT:    shll $31, %ecx
262; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
263; AVX-32-NEXT:    vcomisd %xmm1, %xmm2
264; AVX-32-NEXT:    vmovapd %xmm1, %xmm3
265; AVX-32-NEXT:    jae .LBB1_6
266; AVX-32-NEXT:  # %bb.5:
267; AVX-32-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
268; AVX-32-NEXT:  .LBB1_6:
269; AVX-32-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
270; AVX-32-NEXT:    vmovsd %xmm2, {{[0-9]+}}(%esp)
271; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
272; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
273; AVX-32-NEXT:    wait
274; AVX-32-NEXT:    setae %dl
275; AVX-32-NEXT:    movzbl %dl, %edx
276; AVX-32-NEXT:    shll $31, %edx
277; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %edx
278; AVX-32-NEXT:    vcomisd %xmm1, %xmm0
279; AVX-32-NEXT:    jae .LBB1_8
280; AVX-32-NEXT:  # %bb.7:
281; AVX-32-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
282; AVX-32-NEXT:  .LBB1_8:
283; AVX-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
284; AVX-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
285; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
286; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
287; AVX-32-NEXT:    wait
288; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
289; AVX-32-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
290; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
291; AVX-32-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
292; AVX-32-NEXT:    setae %cl
293; AVX-32-NEXT:    movzbl %cl, %ecx
294; AVX-32-NEXT:    shll $31, %ecx
295; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
296; AVX-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
297; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
298; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
299; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
300; AVX-32-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
301; AVX-32-NEXT:    movl %ebp, %esp
302; AVX-32-NEXT:    popl %ebp
303; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
304; AVX-32-NEXT:    retl
305;
306; AVX-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
307; AVX-64:       # %bb.0:
308; AVX-64-NEXT:    vextractf128 $1, %ymm0, %xmm3
309; AVX-64-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
310; AVX-64-NEXT:    vcomisd %xmm1, %xmm3
311; AVX-64-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
312; AVX-64-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
313; AVX-64-NEXT:    jb .LBB1_2
314; AVX-64-NEXT:  # %bb.1:
315; AVX-64-NEXT:    vmovapd %xmm1, %xmm4
316; AVX-64-NEXT:  .LBB1_2:
317; AVX-64-NEXT:    vsubsd %xmm4, %xmm3, %xmm4
318; AVX-64-NEXT:    vcvttsd2si %xmm4, %rcx
319; AVX-64-NEXT:    setae %al
320; AVX-64-NEXT:    movzbl %al, %eax
321; AVX-64-NEXT:    shlq $63, %rax
322; AVX-64-NEXT:    xorq %rcx, %rax
323; AVX-64-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
324; AVX-64-NEXT:    vcomisd %xmm1, %xmm4
325; AVX-64-NEXT:    vxorpd %xmm5, %xmm5, %xmm5
326; AVX-64-NEXT:    jb .LBB1_4
327; AVX-64-NEXT:  # %bb.3:
328; AVX-64-NEXT:    vmovapd %xmm1, %xmm5
329; AVX-64-NEXT:  .LBB1_4:
330; AVX-64-NEXT:    vmovq %rax, %xmm3
331; AVX-64-NEXT:    vsubsd %xmm5, %xmm4, %xmm4
332; AVX-64-NEXT:    vcvttsd2si %xmm4, %rax
333; AVX-64-NEXT:    setae %cl
334; AVX-64-NEXT:    movzbl %cl, %ecx
335; AVX-64-NEXT:    shlq $63, %rcx
336; AVX-64-NEXT:    xorq %rax, %rcx
337; AVX-64-NEXT:    vmovq %rcx, %xmm4
338; AVX-64-NEXT:    vcomisd %xmm1, %xmm0
339; AVX-64-NEXT:    vxorpd %xmm5, %xmm5, %xmm5
340; AVX-64-NEXT:    jb .LBB1_6
341; AVX-64-NEXT:  # %bb.5:
342; AVX-64-NEXT:    vmovapd %xmm1, %xmm5
343; AVX-64-NEXT:  .LBB1_6:
344; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
345; AVX-64-NEXT:    vsubsd %xmm5, %xmm0, %xmm4
346; AVX-64-NEXT:    vcvttsd2si %xmm4, %rax
347; AVX-64-NEXT:    setae %cl
348; AVX-64-NEXT:    movzbl %cl, %ecx
349; AVX-64-NEXT:    shlq $63, %rcx
350; AVX-64-NEXT:    xorq %rax, %rcx
351; AVX-64-NEXT:    vmovq %rcx, %xmm4
352; AVX-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
353; AVX-64-NEXT:    vcomisd %xmm1, %xmm0
354; AVX-64-NEXT:    jb .LBB1_8
355; AVX-64-NEXT:  # %bb.7:
356; AVX-64-NEXT:    vmovapd %xmm1, %xmm2
357; AVX-64-NEXT:  .LBB1_8:
358; AVX-64-NEXT:    vsubsd %xmm2, %xmm0, %xmm0
359; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
360; AVX-64-NEXT:    setae %cl
361; AVX-64-NEXT:    movzbl %cl, %ecx
362; AVX-64-NEXT:    shlq $63, %rcx
363; AVX-64-NEXT:    xorq %rax, %rcx
364; AVX-64-NEXT:    vmovq %rcx, %xmm0
365; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0]
366; AVX-64-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
367; AVX-64-NEXT:    retq
368;
369; AVX512F-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
370; AVX512F-32:       # %bb.0:
371; AVX512F-32-NEXT:    pushl %ebp
372; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
373; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
374; AVX512F-32-NEXT:    movl %esp, %ebp
375; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
376; AVX512F-32-NEXT:    pushl %ebx
377; AVX512F-32-NEXT:    andl $-8, %esp
378; AVX512F-32-NEXT:    subl $40, %esp
379; AVX512F-32-NEXT:    .cfi_offset %ebx, -12
380; AVX512F-32-NEXT:    vextractf128 $1, %ymm0, %xmm1
381; AVX512F-32-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
382; AVX512F-32-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
383; AVX512F-32-NEXT:    xorl %eax, %eax
384; AVX512F-32-NEXT:    vcomisd %xmm3, %xmm2
385; AVX512F-32-NEXT:    setae %al
386; AVX512F-32-NEXT:    kmovw %eax, %k1
387; AVX512F-32-NEXT:    vmovsd %xmm3, %xmm3, %xmm4 {%k1} {z}
388; AVX512F-32-NEXT:    vsubsd %xmm4, %xmm2, %xmm2
389; AVX512F-32-NEXT:    vmovsd %xmm2, (%esp)
390; AVX512F-32-NEXT:    xorl %edx, %edx
391; AVX512F-32-NEXT:    vcomisd %xmm3, %xmm1
392; AVX512F-32-NEXT:    setae %dl
393; AVX512F-32-NEXT:    kmovw %edx, %k1
394; AVX512F-32-NEXT:    vmovsd %xmm3, %xmm3, %xmm2 {%k1} {z}
395; AVX512F-32-NEXT:    vsubsd %xmm2, %xmm1, %xmm1
396; AVX512F-32-NEXT:    vmovsd %xmm1, {{[0-9]+}}(%esp)
397; AVX512F-32-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
398; AVX512F-32-NEXT:    xorl %ecx, %ecx
399; AVX512F-32-NEXT:    vcomisd %xmm3, %xmm1
400; AVX512F-32-NEXT:    setae %cl
401; AVX512F-32-NEXT:    kmovw %ecx, %k1
402; AVX512F-32-NEXT:    vmovsd %xmm3, %xmm3, %xmm2 {%k1} {z}
403; AVX512F-32-NEXT:    vsubsd %xmm2, %xmm1, %xmm1
404; AVX512F-32-NEXT:    vmovsd %xmm1, {{[0-9]+}}(%esp)
405; AVX512F-32-NEXT:    xorl %ebx, %ebx
406; AVX512F-32-NEXT:    vcomisd %xmm3, %xmm0
407; AVX512F-32-NEXT:    setae %bl
408; AVX512F-32-NEXT:    kmovw %ebx, %k1
409; AVX512F-32-NEXT:    vmovsd %xmm3, %xmm3, %xmm1 {%k1} {z}
410; AVX512F-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
411; AVX512F-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
412; AVX512F-32-NEXT:    fldl (%esp)
413; AVX512F-32-NEXT:    fisttpll (%esp)
414; AVX512F-32-NEXT:    fldl {{[0-9]+}}(%esp)
415; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
416; AVX512F-32-NEXT:    fldl {{[0-9]+}}(%esp)
417; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
418; AVX512F-32-NEXT:    fldl {{[0-9]+}}(%esp)
419; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
420; AVX512F-32-NEXT:    wait
421; AVX512F-32-NEXT:    shll $31, %eax
422; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
423; AVX512F-32-NEXT:    shll $31, %edx
424; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %edx
425; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
426; AVX512F-32-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
427; AVX512F-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
428; AVX512F-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
429; AVX512F-32-NEXT:    shll $31, %ecx
430; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
431; AVX512F-32-NEXT:    shll $31, %ebx
432; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %ebx
433; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
434; AVX512F-32-NEXT:    vpinsrd $1, %ebx, %xmm1, %xmm1
435; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
436; AVX512F-32-NEXT:    vpinsrd $3, %ecx, %xmm1, %xmm1
437; AVX512F-32-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
438; AVX512F-32-NEXT:    leal -4(%ebp), %esp
439; AVX512F-32-NEXT:    popl %ebx
440; AVX512F-32-NEXT:    popl %ebp
441; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
442; AVX512F-32-NEXT:    retl
443;
444; AVX512F-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
445; AVX512F-64:       # %bb.0:
446; AVX512F-64-NEXT:    vextractf128 $1, %ymm0, %xmm1
447; AVX512F-64-NEXT:    vcvttsd2usi %xmm1, %rax
448; AVX512F-64-NEXT:    vmovq %rax, %xmm2
449; AVX512F-64-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
450; AVX512F-64-NEXT:    vcvttsd2usi %xmm1, %rax
451; AVX512F-64-NEXT:    vmovq %rax, %xmm1
452; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
453; AVX512F-64-NEXT:    vcvttsd2usi %xmm0, %rax
454; AVX512F-64-NEXT:    vmovq %rax, %xmm2
455; AVX512F-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
456; AVX512F-64-NEXT:    vcvttsd2usi %xmm0, %rax
457; AVX512F-64-NEXT:    vmovq %rax, %xmm0
458; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
459; AVX512F-64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
460; AVX512F-64-NEXT:    retq
461;
462; AVX512VL-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
463; AVX512VL-32:       # %bb.0:
464; AVX512VL-32-NEXT:    pushl %ebp
465; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
466; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
467; AVX512VL-32-NEXT:    movl %esp, %ebp
468; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
469; AVX512VL-32-NEXT:    pushl %ebx
470; AVX512VL-32-NEXT:    andl $-8, %esp
471; AVX512VL-32-NEXT:    subl $40, %esp
472; AVX512VL-32-NEXT:    .cfi_offset %ebx, -12
473; AVX512VL-32-NEXT:    vextractf128 $1, %ymm0, %xmm1
474; AVX512VL-32-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
475; AVX512VL-32-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
476; AVX512VL-32-NEXT:    xorl %eax, %eax
477; AVX512VL-32-NEXT:    vcomisd %xmm3, %xmm2
478; AVX512VL-32-NEXT:    setae %al
479; AVX512VL-32-NEXT:    kmovw %eax, %k1
480; AVX512VL-32-NEXT:    vmovsd %xmm3, %xmm3, %xmm4 {%k1} {z}
481; AVX512VL-32-NEXT:    vsubsd %xmm4, %xmm2, %xmm2
482; AVX512VL-32-NEXT:    vmovsd %xmm2, (%esp)
483; AVX512VL-32-NEXT:    xorl %edx, %edx
484; AVX512VL-32-NEXT:    vcomisd %xmm3, %xmm1
485; AVX512VL-32-NEXT:    setae %dl
486; AVX512VL-32-NEXT:    kmovw %edx, %k1
487; AVX512VL-32-NEXT:    vmovsd %xmm3, %xmm3, %xmm2 {%k1} {z}
488; AVX512VL-32-NEXT:    vsubsd %xmm2, %xmm1, %xmm1
489; AVX512VL-32-NEXT:    vmovsd %xmm1, {{[0-9]+}}(%esp)
490; AVX512VL-32-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
491; AVX512VL-32-NEXT:    xorl %ecx, %ecx
492; AVX512VL-32-NEXT:    vcomisd %xmm3, %xmm1
493; AVX512VL-32-NEXT:    setae %cl
494; AVX512VL-32-NEXT:    kmovw %ecx, %k1
495; AVX512VL-32-NEXT:    vmovsd %xmm3, %xmm3, %xmm2 {%k1} {z}
496; AVX512VL-32-NEXT:    vsubsd %xmm2, %xmm1, %xmm1
497; AVX512VL-32-NEXT:    vmovsd %xmm1, {{[0-9]+}}(%esp)
498; AVX512VL-32-NEXT:    xorl %ebx, %ebx
499; AVX512VL-32-NEXT:    vcomisd %xmm3, %xmm0
500; AVX512VL-32-NEXT:    setae %bl
501; AVX512VL-32-NEXT:    kmovw %ebx, %k1
502; AVX512VL-32-NEXT:    vmovsd %xmm3, %xmm3, %xmm1 {%k1} {z}
503; AVX512VL-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
504; AVX512VL-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
505; AVX512VL-32-NEXT:    fldl (%esp)
506; AVX512VL-32-NEXT:    fisttpll (%esp)
507; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
508; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
509; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
510; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
511; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
512; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
513; AVX512VL-32-NEXT:    wait
514; AVX512VL-32-NEXT:    shll $31, %eax
515; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
516; AVX512VL-32-NEXT:    shll $31, %edx
517; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %edx
518; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
519; AVX512VL-32-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
520; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
521; AVX512VL-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
522; AVX512VL-32-NEXT:    shll $31, %ecx
523; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
524; AVX512VL-32-NEXT:    shll $31, %ebx
525; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ebx
526; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
527; AVX512VL-32-NEXT:    vpinsrd $1, %ebx, %xmm1, %xmm1
528; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
529; AVX512VL-32-NEXT:    vpinsrd $3, %ecx, %xmm1, %xmm1
530; AVX512VL-32-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
531; AVX512VL-32-NEXT:    leal -4(%ebp), %esp
532; AVX512VL-32-NEXT:    popl %ebx
533; AVX512VL-32-NEXT:    popl %ebp
534; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
535; AVX512VL-32-NEXT:    retl
536;
537; AVX512VL-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
538; AVX512VL-64:       # %bb.0:
539; AVX512VL-64-NEXT:    vextractf128 $1, %ymm0, %xmm1
540; AVX512VL-64-NEXT:    vcvttsd2usi %xmm1, %rax
541; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
542; AVX512VL-64-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
543; AVX512VL-64-NEXT:    vcvttsd2usi %xmm1, %rax
544; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
545; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
546; AVX512VL-64-NEXT:    vcvttsd2usi %xmm0, %rax
547; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
548; AVX512VL-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
549; AVX512VL-64-NEXT:    vcvttsd2usi %xmm0, %rax
550; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
551; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
552; AVX512VL-64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
553; AVX512VL-64-NEXT:    retq
554;
555; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
556; AVX512DQ:       # %bb.0:
557; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
558; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
559; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
560; AVX512DQ-NEXT:    ret{{[l|q]}}
561;
562; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
563; AVX512DQVL:       # %bb.0:
564; AVX512DQVL-NEXT:    vcvttpd2uqq %ymm0, %ymm0
565; AVX512DQVL-NEXT:    ret{{[l|q]}}
566  %ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double> %a,
567                                              metadata !"fpexcept.strict") #0
568  ret <4 x i64> %ret
569}
570
571define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
572; AVX-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
573; AVX-32:       # %bb.0:
574; AVX-32-NEXT:    pushl %ebp
575; AVX-32-NEXT:    .cfi_def_cfa_offset 8
576; AVX-32-NEXT:    .cfi_offset %ebp, -8
577; AVX-32-NEXT:    movl %esp, %ebp
578; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
579; AVX-32-NEXT:    andl $-8, %esp
580; AVX-32-NEXT:    subl $32, %esp
581; AVX-32-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
582; AVX-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
583; AVX-32-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp)
584; AVX-32-NEXT:    vextractps $3, %xmm0, (%esp)
585; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
586; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
587; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
588; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
589; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
590; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
591; AVX-32-NEXT:    flds (%esp)
592; AVX-32-NEXT:    fisttpll (%esp)
593; AVX-32-NEXT:    wait
594; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
595; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
596; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
597; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
598; AVX-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
599; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
600; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm1, %xmm1
601; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
602; AVX-32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
603; AVX-32-NEXT:    movl %ebp, %esp
604; AVX-32-NEXT:    popl %ebp
605; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
606; AVX-32-NEXT:    retl
607;
608; AVX-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
609; AVX-64:       # %bb.0:
610; AVX-64-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
611; AVX-64-NEXT:    vcvttss2si %xmm1, %rax
612; AVX-64-NEXT:    vmovq %rax, %xmm1
613; AVX-64-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
614; AVX-64-NEXT:    vcvttss2si %xmm2, %rax
615; AVX-64-NEXT:    vmovq %rax, %xmm2
616; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
617; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
618; AVX-64-NEXT:    vmovq %rax, %xmm2
619; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
620; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
621; AVX-64-NEXT:    vmovq %rax, %xmm0
622; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
623; AVX-64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
624; AVX-64-NEXT:    retq
625;
626; AVX512F-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
627; AVX512F-32:       # %bb.0:
628; AVX512F-32-NEXT:    pushl %ebp
629; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
630; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
631; AVX512F-32-NEXT:    movl %esp, %ebp
632; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
633; AVX512F-32-NEXT:    andl $-8, %esp
634; AVX512F-32-NEXT:    subl $32, %esp
635; AVX512F-32-NEXT:    vmovd %xmm0, {{[0-9]+}}(%esp)
636; AVX512F-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
637; AVX512F-32-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp)
638; AVX512F-32-NEXT:    vextractps $3, %xmm0, (%esp)
639; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
640; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
641; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
642; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
643; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
644; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
645; AVX512F-32-NEXT:    flds (%esp)
646; AVX512F-32-NEXT:    fisttpll (%esp)
647; AVX512F-32-NEXT:    wait
648; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
649; AVX512F-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
650; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
651; AVX512F-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
652; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
653; AVX512F-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
654; AVX512F-32-NEXT:    vpinsrd $2, (%esp), %xmm1, %xmm1
655; AVX512F-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
656; AVX512F-32-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
657; AVX512F-32-NEXT:    movl %ebp, %esp
658; AVX512F-32-NEXT:    popl %ebp
659; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
660; AVX512F-32-NEXT:    retl
661;
662; AVX512F-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
663; AVX512F-64:       # %bb.0:
664; AVX512F-64-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
665; AVX512F-64-NEXT:    vcvttss2si %xmm1, %rax
666; AVX512F-64-NEXT:    vmovq %rax, %xmm1
667; AVX512F-64-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
668; AVX512F-64-NEXT:    vcvttss2si %xmm2, %rax
669; AVX512F-64-NEXT:    vmovq %rax, %xmm2
670; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
671; AVX512F-64-NEXT:    vcvttss2si %xmm0, %rax
672; AVX512F-64-NEXT:    vmovq %rax, %xmm2
673; AVX512F-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
674; AVX512F-64-NEXT:    vcvttss2si %xmm0, %rax
675; AVX512F-64-NEXT:    vmovq %rax, %xmm0
676; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
677; AVX512F-64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
678; AVX512F-64-NEXT:    retq
679;
680; AVX512VL-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
681; AVX512VL-32:       # %bb.0:
682; AVX512VL-32-NEXT:    pushl %ebp
683; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
684; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
685; AVX512VL-32-NEXT:    movl %esp, %ebp
686; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
687; AVX512VL-32-NEXT:    andl $-8, %esp
688; AVX512VL-32-NEXT:    subl $32, %esp
689; AVX512VL-32-NEXT:    vmovd %xmm0, {{[0-9]+}}(%esp)
690; AVX512VL-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
691; AVX512VL-32-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp)
692; AVX512VL-32-NEXT:    vextractps $3, %xmm0, (%esp)
693; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
694; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
695; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
696; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
697; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
698; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
699; AVX512VL-32-NEXT:    flds (%esp)
700; AVX512VL-32-NEXT:    fisttpll (%esp)
701; AVX512VL-32-NEXT:    wait
702; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
703; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
704; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
705; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
706; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
707; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
708; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm1, %xmm1
709; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
710; AVX512VL-32-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
711; AVX512VL-32-NEXT:    movl %ebp, %esp
712; AVX512VL-32-NEXT:    popl %ebp
713; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
714; AVX512VL-32-NEXT:    retl
715;
716; AVX512VL-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
717; AVX512VL-64:       # %bb.0:
718; AVX512VL-64-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
719; AVX512VL-64-NEXT:    vcvttss2si %xmm1, %rax
720; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
721; AVX512VL-64-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
722; AVX512VL-64-NEXT:    vcvttss2si %xmm2, %rax
723; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
724; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
725; AVX512VL-64-NEXT:    vcvttss2si %xmm0, %rax
726; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
727; AVX512VL-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
728; AVX512VL-64-NEXT:    vcvttss2si %xmm0, %rax
729; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
730; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
731; AVX512VL-64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
732; AVX512VL-64-NEXT:    retq
733;
734; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
735; AVX512DQ:       # %bb.0:
736; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
737; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
738; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
739; AVX512DQ-NEXT:    ret{{[l|q]}}
740;
741; AVX512DQVL-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
742; AVX512DQVL:       # %bb.0:
743; AVX512DQVL-NEXT:    vcvttps2qq %xmm0, %ymm0
744; AVX512DQVL-NEXT:    ret{{[l|q]}}
745  %ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %a,
746                                              metadata !"fpexcept.strict") #0
747  ret <4 x i64> %ret
748}
749
750define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
751; AVX-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
752; AVX-32:       # %bb.0:
753; AVX-32-NEXT:    pushl %ebp
754; AVX-32-NEXT:    .cfi_def_cfa_offset 8
755; AVX-32-NEXT:    .cfi_offset %ebp, -8
756; AVX-32-NEXT:    movl %esp, %ebp
757; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
758; AVX-32-NEXT:    andl $-8, %esp
759; AVX-32-NEXT:    subl $32, %esp
760; AVX-32-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
761; AVX-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
762; AVX-32-NEXT:    vcomiss %xmm1, %xmm2
763; AVX-32-NEXT:    vmovaps %xmm1, %xmm3
764; AVX-32-NEXT:    jae .LBB3_2
765; AVX-32-NEXT:  # %bb.1:
766; AVX-32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
767; AVX-32-NEXT:  .LBB3_2:
768; AVX-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
769; AVX-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
770; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
771; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
772; AVX-32-NEXT:    wait
773; AVX-32-NEXT:    setae %al
774; AVX-32-NEXT:    movzbl %al, %eax
775; AVX-32-NEXT:    shll $31, %eax
776; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
777; AVX-32-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
778; AVX-32-NEXT:    vcomiss %xmm1, %xmm2
779; AVX-32-NEXT:    vmovaps %xmm1, %xmm3
780; AVX-32-NEXT:    jae .LBB3_4
781; AVX-32-NEXT:  # %bb.3:
782; AVX-32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
783; AVX-32-NEXT:  .LBB3_4:
784; AVX-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
785; AVX-32-NEXT:    vmovss %xmm2, (%esp)
786; AVX-32-NEXT:    flds (%esp)
787; AVX-32-NEXT:    fisttpll (%esp)
788; AVX-32-NEXT:    wait
789; AVX-32-NEXT:    setae %cl
790; AVX-32-NEXT:    movzbl %cl, %ecx
791; AVX-32-NEXT:    shll $31, %ecx
792; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
793; AVX-32-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
794; AVX-32-NEXT:    vcomiss %xmm1, %xmm2
795; AVX-32-NEXT:    vmovaps %xmm1, %xmm3
796; AVX-32-NEXT:    jae .LBB3_6
797; AVX-32-NEXT:  # %bb.5:
798; AVX-32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
799; AVX-32-NEXT:  .LBB3_6:
800; AVX-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
801; AVX-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
802; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
803; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
804; AVX-32-NEXT:    wait
805; AVX-32-NEXT:    setae %dl
806; AVX-32-NEXT:    movzbl %dl, %edx
807; AVX-32-NEXT:    shll $31, %edx
808; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %edx
809; AVX-32-NEXT:    vcomiss %xmm1, %xmm0
810; AVX-32-NEXT:    jae .LBB3_8
811; AVX-32-NEXT:  # %bb.7:
812; AVX-32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
813; AVX-32-NEXT:  .LBB3_8:
814; AVX-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
815; AVX-32-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
816; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
817; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
818; AVX-32-NEXT:    wait
819; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
820; AVX-32-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
821; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
822; AVX-32-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
823; AVX-32-NEXT:    setae %cl
824; AVX-32-NEXT:    movzbl %cl, %ecx
825; AVX-32-NEXT:    shll $31, %ecx
826; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
827; AVX-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
828; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
829; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
830; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
831; AVX-32-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
832; AVX-32-NEXT:    movl %ebp, %esp
833; AVX-32-NEXT:    popl %ebp
834; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
835; AVX-32-NEXT:    retl
836;
837; AVX-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
838; AVX-64:       # %bb.0:
839; AVX-64-NEXT:    vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3]
840; AVX-64-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
841; AVX-64-NEXT:    vcomiss %xmm1, %xmm3
842; AVX-64-NEXT:    vxorps %xmm2, %xmm2, %xmm2
843; AVX-64-NEXT:    vxorps %xmm4, %xmm4, %xmm4
844; AVX-64-NEXT:    jb .LBB3_2
845; AVX-64-NEXT:  # %bb.1:
846; AVX-64-NEXT:    vmovaps %xmm1, %xmm4
847; AVX-64-NEXT:  .LBB3_2:
848; AVX-64-NEXT:    vsubss %xmm4, %xmm3, %xmm3
849; AVX-64-NEXT:    vcvttss2si %xmm3, %rcx
850; AVX-64-NEXT:    setae %al
851; AVX-64-NEXT:    movzbl %al, %eax
852; AVX-64-NEXT:    shlq $63, %rax
853; AVX-64-NEXT:    xorq %rcx, %rax
854; AVX-64-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
855; AVX-64-NEXT:    vcomiss %xmm1, %xmm4
856; AVX-64-NEXT:    vxorps %xmm5, %xmm5, %xmm5
857; AVX-64-NEXT:    jb .LBB3_4
858; AVX-64-NEXT:  # %bb.3:
859; AVX-64-NEXT:    vmovaps %xmm1, %xmm5
860; AVX-64-NEXT:  .LBB3_4:
861; AVX-64-NEXT:    vmovq %rax, %xmm3
862; AVX-64-NEXT:    vsubss %xmm5, %xmm4, %xmm4
863; AVX-64-NEXT:    vcvttss2si %xmm4, %rax
864; AVX-64-NEXT:    setae %cl
865; AVX-64-NEXT:    movzbl %cl, %ecx
866; AVX-64-NEXT:    shlq $63, %rcx
867; AVX-64-NEXT:    xorq %rax, %rcx
868; AVX-64-NEXT:    vmovq %rcx, %xmm4
869; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
870; AVX-64-NEXT:    vxorps %xmm5, %xmm5, %xmm5
871; AVX-64-NEXT:    jb .LBB3_6
872; AVX-64-NEXT:  # %bb.5:
873; AVX-64-NEXT:    vmovaps %xmm1, %xmm5
874; AVX-64-NEXT:  .LBB3_6:
875; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
876; AVX-64-NEXT:    vsubss %xmm5, %xmm0, %xmm4
877; AVX-64-NEXT:    vcvttss2si %xmm4, %rax
878; AVX-64-NEXT:    setae %cl
879; AVX-64-NEXT:    movzbl %cl, %ecx
880; AVX-64-NEXT:    shlq $63, %rcx
881; AVX-64-NEXT:    xorq %rax, %rcx
882; AVX-64-NEXT:    vmovq %rcx, %xmm4
883; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
884; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
885; AVX-64-NEXT:    jb .LBB3_8
886; AVX-64-NEXT:  # %bb.7:
887; AVX-64-NEXT:    vmovaps %xmm1, %xmm2
888; AVX-64-NEXT:  .LBB3_8:
889; AVX-64-NEXT:    vsubss %xmm2, %xmm0, %xmm0
890; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
891; AVX-64-NEXT:    setae %cl
892; AVX-64-NEXT:    movzbl %cl, %ecx
893; AVX-64-NEXT:    shlq $63, %rcx
894; AVX-64-NEXT:    xorq %rax, %rcx
895; AVX-64-NEXT:    vmovq %rcx, %xmm0
896; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0]
897; AVX-64-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
898; AVX-64-NEXT:    retq
899;
900; AVX512F-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
901; AVX512F-32:       # %bb.0:
902; AVX512F-32-NEXT:    pushl %ebp
903; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
904; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
905; AVX512F-32-NEXT:    movl %esp, %ebp
906; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
907; AVX512F-32-NEXT:    pushl %ebx
908; AVX512F-32-NEXT:    andl $-8, %esp
909; AVX512F-32-NEXT:    subl $40, %esp
910; AVX512F-32-NEXT:    .cfi_offset %ebx, -12
911; AVX512F-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
912; AVX512F-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
913; AVX512F-32-NEXT:    xorl %eax, %eax
914; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm1
915; AVX512F-32-NEXT:    setae %al
916; AVX512F-32-NEXT:    kmovw %eax, %k1
917; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
918; AVX512F-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
919; AVX512F-32-NEXT:    vmovss %xmm1, (%esp)
920; AVX512F-32-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
921; AVX512F-32-NEXT:    xorl %edx, %edx
922; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm1
923; AVX512F-32-NEXT:    setae %dl
924; AVX512F-32-NEXT:    kmovw %edx, %k1
925; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
926; AVX512F-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
927; AVX512F-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
928; AVX512F-32-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
929; AVX512F-32-NEXT:    xorl %ecx, %ecx
930; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm1
931; AVX512F-32-NEXT:    setae %cl
932; AVX512F-32-NEXT:    kmovw %ecx, %k1
933; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
934; AVX512F-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
935; AVX512F-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
936; AVX512F-32-NEXT:    xorl %ebx, %ebx
937; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm0
938; AVX512F-32-NEXT:    setae %bl
939; AVX512F-32-NEXT:    kmovw %ebx, %k1
940; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
941; AVX512F-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
942; AVX512F-32-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
943; AVX512F-32-NEXT:    flds (%esp)
944; AVX512F-32-NEXT:    fisttpll (%esp)
945; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
946; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
947; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
948; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
949; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
950; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
951; AVX512F-32-NEXT:    wait
952; AVX512F-32-NEXT:    shll $31, %eax
953; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
954; AVX512F-32-NEXT:    shll $31, %edx
955; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %edx
956; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
957; AVX512F-32-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
958; AVX512F-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
959; AVX512F-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
960; AVX512F-32-NEXT:    shll $31, %ecx
961; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
962; AVX512F-32-NEXT:    shll $31, %ebx
963; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %ebx
964; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
965; AVX512F-32-NEXT:    vpinsrd $1, %ebx, %xmm1, %xmm1
966; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
967; AVX512F-32-NEXT:    vpinsrd $3, %ecx, %xmm1, %xmm1
968; AVX512F-32-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
969; AVX512F-32-NEXT:    leal -4(%ebp), %esp
970; AVX512F-32-NEXT:    popl %ebx
971; AVX512F-32-NEXT:    popl %ebp
972; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
973; AVX512F-32-NEXT:    retl
974;
975; AVX512F-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
976; AVX512F-64:       # %bb.0:
977; AVX512F-64-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
978; AVX512F-64-NEXT:    vcvttss2usi %xmm1, %rax
979; AVX512F-64-NEXT:    vmovq %rax, %xmm1
980; AVX512F-64-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
981; AVX512F-64-NEXT:    vcvttss2usi %xmm2, %rax
982; AVX512F-64-NEXT:    vmovq %rax, %xmm2
983; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
984; AVX512F-64-NEXT:    vcvttss2usi %xmm0, %rax
985; AVX512F-64-NEXT:    vmovq %rax, %xmm2
986; AVX512F-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
987; AVX512F-64-NEXT:    vcvttss2usi %xmm0, %rax
988; AVX512F-64-NEXT:    vmovq %rax, %xmm0
989; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
990; AVX512F-64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
991; AVX512F-64-NEXT:    retq
992;
993; AVX512VL-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
994; AVX512VL-32:       # %bb.0:
995; AVX512VL-32-NEXT:    pushl %ebp
996; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
997; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
998; AVX512VL-32-NEXT:    movl %esp, %ebp
999; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
1000; AVX512VL-32-NEXT:    pushl %ebx
1001; AVX512VL-32-NEXT:    andl $-8, %esp
1002; AVX512VL-32-NEXT:    subl $40, %esp
1003; AVX512VL-32-NEXT:    .cfi_offset %ebx, -12
1004; AVX512VL-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1005; AVX512VL-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1006; AVX512VL-32-NEXT:    xorl %eax, %eax
1007; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm1
1008; AVX512VL-32-NEXT:    setae %al
1009; AVX512VL-32-NEXT:    kmovw %eax, %k1
1010; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
1011; AVX512VL-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
1012; AVX512VL-32-NEXT:    vmovss %xmm1, (%esp)
1013; AVX512VL-32-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1014; AVX512VL-32-NEXT:    xorl %edx, %edx
1015; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm1
1016; AVX512VL-32-NEXT:    setae %dl
1017; AVX512VL-32-NEXT:    kmovw %edx, %k1
1018; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
1019; AVX512VL-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
1020; AVX512VL-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
1021; AVX512VL-32-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1022; AVX512VL-32-NEXT:    xorl %ecx, %ecx
1023; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm1
1024; AVX512VL-32-NEXT:    setae %cl
1025; AVX512VL-32-NEXT:    kmovw %ecx, %k1
1026; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
1027; AVX512VL-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
1028; AVX512VL-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
1029; AVX512VL-32-NEXT:    xorl %ebx, %ebx
1030; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm0
1031; AVX512VL-32-NEXT:    setae %bl
1032; AVX512VL-32-NEXT:    kmovw %ebx, %k1
1033; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
1034; AVX512VL-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
1035; AVX512VL-32-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
1036; AVX512VL-32-NEXT:    flds (%esp)
1037; AVX512VL-32-NEXT:    fisttpll (%esp)
1038; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
1039; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
1040; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
1041; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
1042; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
1043; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
1044; AVX512VL-32-NEXT:    wait
1045; AVX512VL-32-NEXT:    shll $31, %eax
1046; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
1047; AVX512VL-32-NEXT:    shll $31, %edx
1048; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %edx
1049; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1050; AVX512VL-32-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
1051; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
1052; AVX512VL-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
1053; AVX512VL-32-NEXT:    shll $31, %ecx
1054; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
1055; AVX512VL-32-NEXT:    shll $31, %ebx
1056; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ebx
1057; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1058; AVX512VL-32-NEXT:    vpinsrd $1, %ebx, %xmm1, %xmm1
1059; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
1060; AVX512VL-32-NEXT:    vpinsrd $3, %ecx, %xmm1, %xmm1
1061; AVX512VL-32-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1062; AVX512VL-32-NEXT:    leal -4(%ebp), %esp
1063; AVX512VL-32-NEXT:    popl %ebx
1064; AVX512VL-32-NEXT:    popl %ebp
1065; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
1066; AVX512VL-32-NEXT:    retl
1067;
1068; AVX512VL-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
1069; AVX512VL-64:       # %bb.0:
1070; AVX512VL-64-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
1071; AVX512VL-64-NEXT:    vcvttss2usi %xmm1, %rax
1072; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
1073; AVX512VL-64-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1074; AVX512VL-64-NEXT:    vcvttss2usi %xmm2, %rax
1075; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
1076; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
1077; AVX512VL-64-NEXT:    vcvttss2usi %xmm0, %rax
1078; AVX512VL-64-NEXT:    vmovq %rax, %xmm2
1079; AVX512VL-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
1080; AVX512VL-64-NEXT:    vcvttss2usi %xmm0, %rax
1081; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
1082; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
1083; AVX512VL-64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1084; AVX512VL-64-NEXT:    retq
1085;
1086; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
1087; AVX512DQ:       # %bb.0:
1088; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
1089; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
1090; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1091; AVX512DQ-NEXT:    ret{{[l|q]}}
1092;
1093; AVX512DQVL-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
1094; AVX512DQVL:       # %bb.0:
1095; AVX512DQVL-NEXT:    vcvttps2uqq %xmm0, %ymm0
1096; AVX512DQVL-NEXT:    ret{{[l|q]}}
1097  %ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %a,
1098                                              metadata !"fpexcept.strict") #0
1099  ret <4 x i64> %ret
1100}
1101
1102define <4 x i32> @strict_vector_fptosi_v4f64_to_v4i32(<4 x double> %a) #0 {
1103; CHECK-LABEL: strict_vector_fptosi_v4f64_to_v4i32:
1104; CHECK:       # %bb.0:
1105; CHECK-NEXT:    vcvttpd2dq %ymm0, %xmm0
1106; CHECK-NEXT:    vzeroupper
1107; CHECK-NEXT:    ret{{[l|q]}}
1108  %ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double> %a,
1109                                              metadata !"fpexcept.strict") #0
1110  ret <4 x i32> %ret
1111}
1112
1113define <4 x i32> @strict_vector_fptoui_v4f64_to_v4i32(<4 x double> %a) #0 {
1114; AVX-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
1115; AVX:       # %bb.0:
1116; AVX-NEXT:    vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
1117; AVX-NEXT:    vcmpltpd %ymm1, %ymm0, %ymm2
1118; AVX-NEXT:    vextractf128 $1, %ymm2, %xmm3
1119; AVX-NEXT:    vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm3[0,2]
1120; AVX-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1121; AVX-NEXT:    vmovaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
1122; AVX-NEXT:    vblendvps %xmm3, %xmm4, %xmm5, %xmm3
1123; AVX-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1124; AVX-NEXT:    vblendvpd %ymm2, %ymm4, %ymm1, %ymm1
1125; AVX-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
1126; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0
1127; AVX-NEXT:    vxorpd %xmm3, %xmm0, %xmm0
1128; AVX-NEXT:    vzeroupper
1129; AVX-NEXT:    ret{{[l|q]}}
1130;
1131; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
1132; AVX512F:       # %bb.0:
1133; AVX512F-NEXT:    vmovaps %ymm0, %ymm0
1134; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
1135; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1136; AVX512F-NEXT:    vzeroupper
1137; AVX512F-NEXT:    ret{{[l|q]}}
1138;
1139; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
1140; AVX512VL:       # %bb.0:
1141; AVX512VL-NEXT:    vcvttpd2udq %ymm0, %xmm0
1142; AVX512VL-NEXT:    vzeroupper
1143; AVX512VL-NEXT:    ret{{[l|q]}}
1144;
1145; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
1146; AVX512DQ:       # %bb.0:
1147; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
1148; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
1149; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1150; AVX512DQ-NEXT:    vzeroupper
1151; AVX512DQ-NEXT:    ret{{[l|q]}}
1152;
1153; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
1154; AVX512DQVL:       # %bb.0:
1155; AVX512DQVL-NEXT:    vcvttpd2udq %ymm0, %xmm0
1156; AVX512DQVL-NEXT:    vzeroupper
1157; AVX512DQVL-NEXT:    ret{{[l|q]}}
1158  %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double> %a,
1159                                              metadata !"fpexcept.strict") #0
1160  ret <4 x i32> %ret
1161}
1162
1163define <4 x i16> @strict_vector_fptosi_v4f64_to_v4i16(<4 x double> %a) #0 {
1164; CHECK-LABEL: strict_vector_fptosi_v4f64_to_v4i16:
1165; CHECK:       # %bb.0:
1166; CHECK-NEXT:    vcvttpd2dq %ymm0, %xmm0
1167; CHECK-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1168; CHECK-NEXT:    vzeroupper
1169; CHECK-NEXT:    ret{{[l|q]}}
1170  %ret = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f64(<4 x double> %a,
1171                                              metadata !"fpexcept.strict") #0
1172  ret <4 x i16> %ret
1173}
1174
1175define <4 x i16> @strict_vector_fptoui_v4f64_to_v4i16(<4 x double> %a) #0 {
1176; CHECK-LABEL: strict_vector_fptoui_v4f64_to_v4i16:
1177; CHECK:       # %bb.0:
1178; CHECK-NEXT:    vcvttpd2dq %ymm0, %xmm0
1179; CHECK-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
1180; CHECK-NEXT:    vzeroupper
1181; CHECK-NEXT:    ret{{[l|q]}}
1182  %ret = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f64(<4 x double> %a,
1183                                              metadata !"fpexcept.strict") #0
1184  ret <4 x i16> %ret
1185}
1186
1187define <4 x i8> @strict_vector_fptosi_v4f64_to_v4i8(<4 x double> %a) #0 {
1188; AVX-LABEL: strict_vector_fptosi_v4f64_to_v4i8:
1189; AVX:       # %bb.0:
1190; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0
1191; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1192; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
1193; AVX-NEXT:    vzeroupper
1194; AVX-NEXT:    ret{{[l|q]}}
1195;
1196; AVX512F-LABEL: strict_vector_fptosi_v4f64_to_v4i8:
1197; AVX512F:       # %bb.0:
1198; AVX512F-NEXT:    vcvttpd2dq %ymm0, %xmm0
1199; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1200; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
1201; AVX512F-NEXT:    vzeroupper
1202; AVX512F-NEXT:    ret{{[l|q]}}
1203;
1204; AVX512VL-LABEL: strict_vector_fptosi_v4f64_to_v4i8:
1205; AVX512VL:       # %bb.0:
1206; AVX512VL-NEXT:    vcvttpd2dq %ymm0, %xmm0
1207; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
1208; AVX512VL-NEXT:    vzeroupper
1209; AVX512VL-NEXT:    ret{{[l|q]}}
1210;
1211; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i8:
1212; AVX512DQ:       # %bb.0:
1213; AVX512DQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
1214; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
1215; AVX512DQ-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
1216; AVX512DQ-NEXT:    vzeroupper
1217; AVX512DQ-NEXT:    ret{{[l|q]}}
1218;
1219; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i8:
1220; AVX512DQVL:       # %bb.0:
1221; AVX512DQVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
1222; AVX512DQVL-NEXT:    vpmovdb %xmm0, %xmm0
1223; AVX512DQVL-NEXT:    vzeroupper
1224; AVX512DQVL-NEXT:    ret{{[l|q]}}
1225  %ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f64(<4 x double> %a,
1226                                              metadata !"fpexcept.strict") #0
1227  ret <4 x i8> %ret
1228}
1229
1230define <4 x i8> @strict_vector_fptoui_v4f64_to_v4i8(<4 x double> %a) #0 {
1231; AVX-LABEL: strict_vector_fptoui_v4f64_to_v4i8:
1232; AVX:       # %bb.0:
1233; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0
1234; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
1235; AVX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
1236; AVX-NEXT:    vzeroupper
1237; AVX-NEXT:    ret{{[l|q]}}
1238;
1239; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i8:
1240; AVX512F:       # %bb.0:
1241; AVX512F-NEXT:    vcvttpd2dq %ymm0, %xmm0
1242; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
1243; AVX512F-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
1244; AVX512F-NEXT:    vzeroupper
1245; AVX512F-NEXT:    ret{{[l|q]}}
1246;
1247; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i8:
1248; AVX512VL:       # %bb.0:
1249; AVX512VL-NEXT:    vcvttpd2dq %ymm0, %xmm0
1250; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
1251; AVX512VL-NEXT:    vzeroupper
1252; AVX512VL-NEXT:    ret{{[l|q]}}
1253;
1254; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i8:
1255; AVX512DQ:       # %bb.0:
1256; AVX512DQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
1257; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
1258; AVX512DQ-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
1259; AVX512DQ-NEXT:    vzeroupper
1260; AVX512DQ-NEXT:    ret{{[l|q]}}
1261;
1262; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i8:
1263; AVX512DQVL:       # %bb.0:
1264; AVX512DQVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
1265; AVX512DQVL-NEXT:    vpmovdb %xmm0, %xmm0
1266; AVX512DQVL-NEXT:    vzeroupper
1267; AVX512DQVL-NEXT:    ret{{[l|q]}}
1268  %ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f64(<4 x double> %a,
1269                                              metadata !"fpexcept.strict") #0
1270  ret <4 x i8> %ret
1271}
1272
1273define <4 x i1> @strict_vector_fptosi_v4f64_to_v4i1(<4 x double> %a) #0 {
1274; AVX-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
1275; AVX:       # %bb.0:
1276; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0
1277; AVX-NEXT:    vzeroupper
1278; AVX-NEXT:    ret{{[l|q]}}
1279;
1280; AVX512F-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
1281; AVX512F:       # %bb.0:
1282; AVX512F-NEXT:    vcvttpd2dq %ymm0, %xmm0
1283; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1284; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1285; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1286; AVX512F-NEXT:    vzeroupper
1287; AVX512F-NEXT:    ret{{[l|q]}}
1288;
1289; AVX512VL-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
1290; AVX512VL:       # %bb.0:
1291; AVX512VL-NEXT:    vcvttpd2dq %ymm0, %xmm0
1292; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
1293; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1294; AVX512VL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1295; AVX512VL-NEXT:    vzeroupper
1296; AVX512VL-NEXT:    ret{{[l|q]}}
1297;
1298; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
1299; AVX512DQ:       # %bb.0:
1300; AVX512DQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
1301; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
1302; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
1303; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1304; AVX512DQ-NEXT:    vzeroupper
1305; AVX512DQ-NEXT:    ret{{[l|q]}}
1306;
1307; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
1308; AVX512DQVL:       # %bb.0:
1309; AVX512DQVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
1310; AVX512DQVL-NEXT:    vpmovd2m %xmm0, %k0
1311; AVX512DQVL-NEXT:    vpmovm2d %k0, %xmm0
1312; AVX512DQVL-NEXT:    vzeroupper
1313; AVX512DQVL-NEXT:    ret{{[l|q]}}
1314  %ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f64(<4 x double> %a,
1315                                              metadata !"fpexcept.strict") #0
1316  ret <4 x i1> %ret
1317}
1318
1319define <4 x i1> @strict_vector_fptoui_v4f64_to_v4i1(<4 x double> %a) #0 {
1320; AVX-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
1321; AVX:       # %bb.0:
1322; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0
1323; AVX-NEXT:    vzeroupper
1324; AVX-NEXT:    ret{{[l|q]}}
1325;
1326; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
1327; AVX512F:       # %bb.0:
1328; AVX512F-NEXT:    vcvttpd2dq %ymm0, %xmm0
1329; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
1330; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1331; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1332; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1333; AVX512F-NEXT:    vzeroupper
1334; AVX512F-NEXT:    ret{{[l|q]}}
1335;
1336; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
1337; AVX512VL:       # %bb.0:
1338; AVX512VL-NEXT:    vcvttpd2dq %ymm0, %xmm0
1339; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
1340; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
1341; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1342; AVX512VL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1343; AVX512VL-NEXT:    vzeroupper
1344; AVX512VL-NEXT:    ret{{[l|q]}}
1345;
1346; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
1347; AVX512DQ:       # %bb.0:
1348; AVX512DQ-NEXT:    vcvttpd2dq %ymm0, %xmm0
1349; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
1350; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
1351; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
1352; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1353; AVX512DQ-NEXT:    vzeroupper
1354; AVX512DQ-NEXT:    ret{{[l|q]}}
1355;
1356; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
1357; AVX512DQVL:       # %bb.0:
1358; AVX512DQVL-NEXT:    vcvttpd2dq %ymm0, %xmm0
1359; AVX512DQVL-NEXT:    vpslld $31, %xmm0, %xmm0
1360; AVX512DQVL-NEXT:    vpmovd2m %xmm0, %k0
1361; AVX512DQVL-NEXT:    vpmovm2d %k0, %xmm0
1362; AVX512DQVL-NEXT:    vzeroupper
1363; AVX512DQVL-NEXT:    ret{{[l|q]}}
1364  %ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f64(<4 x double> %a,
1365                                              metadata !"fpexcept.strict") #0
1366  ret <4 x i1> %ret
1367}
1368
1369define <8 x i32> @strict_vector_fptosi_v8f32_to_v8i32(<8 x float> %a) #0 {
1370; CHECK-LABEL: strict_vector_fptosi_v8f32_to_v8i32:
1371; CHECK:       # %bb.0:
1372; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
1373; CHECK-NEXT:    ret{{[l|q]}}
1374  %ret = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f32(<8 x float> %a,
1375                                              metadata !"fpexcept.strict") #0
1376  ret <8 x i32> %ret
1377}
1378
1379define <8 x i32> @strict_vector_fptoui_v8f32_to_v8i32(<8 x float> %a) #0 {
1380; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
1381; AVX:       # %bb.0:
1382; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
1383; AVX-NEXT:    vcmpltps %ymm1, %ymm0, %ymm2
1384; AVX-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1385; AVX-NEXT:    vmovaps {{.*#+}} ymm4 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
1386; AVX-NEXT:    vblendvps %ymm2, %ymm3, %ymm4, %ymm4
1387; AVX-NEXT:    vblendvps %ymm2, %ymm3, %ymm1, %ymm1
1388; AVX-NEXT:    vsubps %ymm1, %ymm0, %ymm0
1389; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
1390; AVX-NEXT:    vxorps %ymm4, %ymm0, %ymm0
1391; AVX-NEXT:    ret{{[l|q]}}
1392;
1393; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
1394; AVX512F:       # %bb.0:
1395; AVX512F-NEXT:    vmovaps %ymm0, %ymm0
1396; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
1397; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1398; AVX512F-NEXT:    ret{{[l|q]}}
1399;
1400; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
1401; AVX512VL:       # %bb.0:
1402; AVX512VL-NEXT:    vcvttps2udq %ymm0, %ymm0
1403; AVX512VL-NEXT:    ret{{[l|q]}}
1404;
1405; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
1406; AVX512DQ:       # %bb.0:
1407; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
1408; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
1409; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1410; AVX512DQ-NEXT:    ret{{[l|q]}}
1411;
1412; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
1413; AVX512DQVL:       # %bb.0:
1414; AVX512DQVL-NEXT:    vcvttps2udq %ymm0, %ymm0
1415; AVX512DQVL-NEXT:    ret{{[l|q]}}
1416  %ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f32(<8 x float> %a,
1417                                              metadata !"fpexcept.strict") #0
1418  ret <8 x i32> %ret
1419}
1420
1421define <8 x i16> @strict_vector_fptosi_v8f32_to_v8i16(<8 x float> %a) #0 {
1422; AVX-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
1423; AVX:       # %bb.0:
1424; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
1425; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
1426; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1427; AVX-NEXT:    vzeroupper
1428; AVX-NEXT:    ret{{[l|q]}}
1429;
1430; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
1431; AVX512F:       # %bb.0:
1432; AVX512F-NEXT:    vcvttps2dq %ymm0, %ymm0
1433; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
1434; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1435; AVX512F-NEXT:    vzeroupper
1436; AVX512F-NEXT:    ret{{[l|q]}}
1437;
1438; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
1439; AVX512VL:       # %bb.0:
1440; AVX512VL-NEXT:    vcvttps2dq %ymm0, %ymm0
1441; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
1442; AVX512VL-NEXT:    vzeroupper
1443; AVX512VL-NEXT:    ret{{[l|q]}}
1444;
1445; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
1446; AVX512DQ:       # %bb.0:
1447; AVX512DQ-NEXT:    vcvttps2dq %ymm0, %ymm0
1448; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
1449; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1450; AVX512DQ-NEXT:    vzeroupper
1451; AVX512DQ-NEXT:    ret{{[l|q]}}
1452;
1453; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
1454; AVX512DQVL:       # %bb.0:
1455; AVX512DQVL-NEXT:    vcvttps2dq %ymm0, %ymm0
1456; AVX512DQVL-NEXT:    vpmovdw %ymm0, %xmm0
1457; AVX512DQVL-NEXT:    vzeroupper
1458; AVX512DQVL-NEXT:    ret{{[l|q]}}
1459  %ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f32(<8 x float> %a,
1460                                              metadata !"fpexcept.strict") #0
1461  ret <8 x i16> %ret
1462}
1463
1464define <8 x i16> @strict_vector_fptoui_v8f32_to_v8i16(<8 x float> %a) #0 {
1465; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
1466; AVX:       # %bb.0:
1467; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
1468; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
1469; AVX-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
1470; AVX-NEXT:    vzeroupper
1471; AVX-NEXT:    ret{{[l|q]}}
1472;
1473; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
1474; AVX512F:       # %bb.0:
1475; AVX512F-NEXT:    vcvttps2dq %ymm0, %ymm0
1476; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
1477; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1478; AVX512F-NEXT:    vzeroupper
1479; AVX512F-NEXT:    ret{{[l|q]}}
1480;
1481; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
1482; AVX512VL:       # %bb.0:
1483; AVX512VL-NEXT:    vcvttps2dq %ymm0, %ymm0
1484; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
1485; AVX512VL-NEXT:    vzeroupper
1486; AVX512VL-NEXT:    ret{{[l|q]}}
1487;
1488; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
1489; AVX512DQ:       # %bb.0:
1490; AVX512DQ-NEXT:    vcvttps2dq %ymm0, %ymm0
1491; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
1492; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1493; AVX512DQ-NEXT:    vzeroupper
1494; AVX512DQ-NEXT:    ret{{[l|q]}}
1495;
1496; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
1497; AVX512DQVL:       # %bb.0:
1498; AVX512DQVL-NEXT:    vcvttps2dq %ymm0, %ymm0
1499; AVX512DQVL-NEXT:    vpmovdw %ymm0, %xmm0
1500; AVX512DQVL-NEXT:    vzeroupper
1501; AVX512DQVL-NEXT:    ret{{[l|q]}}
1502  %ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f32(<8 x float> %a,
1503                                              metadata !"fpexcept.strict") #0
1504  ret <8 x i16> %ret
1505}
1506
1507define <8 x i8> @strict_vector_fptosi_v8f32_to_v8i8(<8 x float> %a) #0 {
1508; AVX-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
1509; AVX:       # %bb.0:
1510; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
1511; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
1512; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1513; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
1514; AVX-NEXT:    vzeroupper
1515; AVX-NEXT:    ret{{[l|q]}}
1516;
1517; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
1518; AVX512F:       # %bb.0:
1519; AVX512F-NEXT:    vcvttps2dq %ymm0, %ymm0
1520; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1521; AVX512F-NEXT:    vzeroupper
1522; AVX512F-NEXT:    ret{{[l|q]}}
1523;
1524; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
1525; AVX512VL:       # %bb.0:
1526; AVX512VL-NEXT:    vcvttps2dq %ymm0, %ymm0
1527; AVX512VL-NEXT:    vpmovdb %ymm0, %xmm0
1528; AVX512VL-NEXT:    vzeroupper
1529; AVX512VL-NEXT:    ret{{[l|q]}}
1530;
1531; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
1532; AVX512DQ:       # %bb.0:
1533; AVX512DQ-NEXT:    vcvttps2dq %ymm0, %ymm0
1534; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1535; AVX512DQ-NEXT:    vzeroupper
1536; AVX512DQ-NEXT:    ret{{[l|q]}}
1537;
1538; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
1539; AVX512DQVL:       # %bb.0:
1540; AVX512DQVL-NEXT:    vcvttps2dq %ymm0, %ymm0
1541; AVX512DQVL-NEXT:    vpmovdb %ymm0, %xmm0
1542; AVX512DQVL-NEXT:    vzeroupper
1543; AVX512DQVL-NEXT:    ret{{[l|q]}}
1544  %ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f32(<8 x float> %a,
1545                                              metadata !"fpexcept.strict") #0
1546  ret <8 x i8> %ret
1547}
1548
1549define <8 x i8> @strict_vector_fptoui_v8f32_to_v8i8(<8 x float> %a) #0 {
1550; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
1551; AVX:       # %bb.0:
1552; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
1553; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
1554; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1555; AVX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
1556; AVX-NEXT:    vzeroupper
1557; AVX-NEXT:    ret{{[l|q]}}
1558;
1559; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
1560; AVX512F:       # %bb.0:
1561; AVX512F-NEXT:    vcvttps2dq %ymm0, %ymm0
1562; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1563; AVX512F-NEXT:    vzeroupper
1564; AVX512F-NEXT:    ret{{[l|q]}}
1565;
1566; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
1567; AVX512VL:       # %bb.0:
1568; AVX512VL-NEXT:    vcvttps2dq %ymm0, %ymm0
1569; AVX512VL-NEXT:    vpmovdb %ymm0, %xmm0
1570; AVX512VL-NEXT:    vzeroupper
1571; AVX512VL-NEXT:    ret{{[l|q]}}
1572;
1573; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
1574; AVX512DQ:       # %bb.0:
1575; AVX512DQ-NEXT:    vcvttps2dq %ymm0, %ymm0
1576; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
1577; AVX512DQ-NEXT:    vzeroupper
1578; AVX512DQ-NEXT:    ret{{[l|q]}}
1579;
1580; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
1581; AVX512DQVL:       # %bb.0:
1582; AVX512DQVL-NEXT:    vcvttps2dq %ymm0, %ymm0
1583; AVX512DQVL-NEXT:    vpmovdb %ymm0, %xmm0
1584; AVX512DQVL-NEXT:    vzeroupper
1585; AVX512DQVL-NEXT:    ret{{[l|q]}}
1586  %ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f32(<8 x float> %a,
1587                                              metadata !"fpexcept.strict") #0
1588  ret <8 x i8> %ret
1589}
1590
1591define <8 x i1> @strict_vector_fptosi_v8f32_to_v8i1(<8 x float> %a) #0 {
1592; AVX-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
1593; AVX:       # %bb.0:
1594; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
1595; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
1596; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
1597; AVX-NEXT:    vzeroupper
1598; AVX-NEXT:    ret{{[l|q]}}
1599;
1600; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
1601; AVX512F:       # %bb.0:
1602; AVX512F-NEXT:    vcvttps2dq %ymm0, %ymm0
1603; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1604; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1605; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
1606; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1607; AVX512F-NEXT:    vzeroupper
1608; AVX512F-NEXT:    ret{{[l|q]}}
1609;
1610; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
1611; AVX512VL:       # %bb.0:
1612; AVX512VL-NEXT:    vcvttps2dq %ymm0, %ymm0
1613; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
1614; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
1615; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1616; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
1617; AVX512VL-NEXT:    vzeroupper
1618; AVX512VL-NEXT:    ret{{[l|q]}}
1619;
1620; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
1621; AVX512DQ:       # %bb.0:
1622; AVX512DQ-NEXT:    vcvttps2dq %ymm0, %ymm0
1623; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
1624; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
1625; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
1626; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1627; AVX512DQ-NEXT:    vzeroupper
1628; AVX512DQ-NEXT:    ret{{[l|q]}}
1629;
1630; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
1631; AVX512DQVL:       # %bb.0:
1632; AVX512DQVL-NEXT:    vcvttps2dq %ymm0, %ymm0
1633; AVX512DQVL-NEXT:    vpmovd2m %ymm0, %k0
1634; AVX512DQVL-NEXT:    vpmovm2d %k0, %ymm0
1635; AVX512DQVL-NEXT:    vpmovdw %ymm0, %xmm0
1636; AVX512DQVL-NEXT:    vzeroupper
1637; AVX512DQVL-NEXT:    ret{{[l|q]}}
1638  %ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float> %a,
1639                                              metadata !"fpexcept.strict") #0
1640  ret <8 x i1> %ret
1641}
1642
1643define <8 x i1> @strict_vector_fptoui_v8f32_to_v8i1(<8 x float> %a) #0 {
1644; AVX-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
1645; AVX:       # %bb.0:
1646; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
1647; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
1648; AVX-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
1649; AVX-NEXT:    vzeroupper
1650; AVX-NEXT:    ret{{[l|q]}}
1651;
1652; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
1653; AVX512F:       # %bb.0:
1654; AVX512F-NEXT:    vcvttps2dq %ymm0, %ymm0
1655; AVX512F-NEXT:    vpslld $31, %ymm0, %ymm0
1656; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1657; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1658; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
1659; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1660; AVX512F-NEXT:    vzeroupper
1661; AVX512F-NEXT:    ret{{[l|q]}}
1662;
1663; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
1664; AVX512VL:       # %bb.0:
1665; AVX512VL-NEXT:    vcvttps2dq %ymm0, %ymm0
1666; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
1667; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
1668; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
1669; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1670; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
1671; AVX512VL-NEXT:    vzeroupper
1672; AVX512VL-NEXT:    ret{{[l|q]}}
1673;
1674; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
1675; AVX512DQ:       # %bb.0:
1676; AVX512DQ-NEXT:    vcvttps2dq %ymm0, %ymm0
1677; AVX512DQ-NEXT:    vpslld $31, %ymm0, %ymm0
1678; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
1679; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
1680; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
1681; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1682; AVX512DQ-NEXT:    vzeroupper
1683; AVX512DQ-NEXT:    ret{{[l|q]}}
1684;
1685; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
1686; AVX512DQVL:       # %bb.0:
1687; AVX512DQVL-NEXT:    vcvttps2dq %ymm0, %ymm0
1688; AVX512DQVL-NEXT:    vpslld $31, %ymm0, %ymm0
1689; AVX512DQVL-NEXT:    vpmovd2m %ymm0, %k0
1690; AVX512DQVL-NEXT:    vpmovm2d %k0, %ymm0
1691; AVX512DQVL-NEXT:    vpmovdw %ymm0, %xmm0
1692; AVX512DQVL-NEXT:    vzeroupper
1693; AVX512DQVL-NEXT:    ret{{[l|q]}}
1694  %ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float> %a,
1695                                              metadata !"fpexcept.strict") #0
1696  ret <8 x i1> %ret
1697}
1698
1699attributes #0 = { strictfp }
1700