• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s
3; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
5; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512dq < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
6
7define <1 x float> @constrained_vector_fdiv_v1f32() #0 {
8; CHECK-LABEL: constrained_vector_fdiv_v1f32:
9; CHECK:       # %bb.0: # %entry
10; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
11; CHECK-NEXT:    divss {{.*}}(%rip), %xmm0
12; CHECK-NEXT:    retq
13;
14; AVX-LABEL: constrained_vector_fdiv_v1f32:
15; AVX:       # %bb.0: # %entry
16; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
17; AVX-NEXT:    vdivss {{.*}}(%rip), %xmm0, %xmm0
18; AVX-NEXT:    retq
19entry:
20  %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32(
21           <1 x float> <float 1.000000e+00>,
22           <1 x float> <float 1.000000e+01>,
23           metadata !"round.dynamic",
24           metadata !"fpexcept.strict") #0
25  ret <1 x float> %div
26}
27
28define <2 x double> @constrained_vector_fdiv_v2f64() #0 {
29; CHECK-LABEL: constrained_vector_fdiv_v2f64:
30; CHECK:       # %bb.0: # %entry
31; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
32; CHECK-NEXT:    divpd {{.*}}(%rip), %xmm0
33; CHECK-NEXT:    retq
34;
35; AVX-LABEL: constrained_vector_fdiv_v2f64:
36; AVX:       # %bb.0: # %entry
37; AVX-NEXT:    vmovapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
38; AVX-NEXT:    vdivpd {{.*}}(%rip), %xmm0, %xmm0
39; AVX-NEXT:    retq
40entry:
41  %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
42           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
43           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
44           metadata !"round.dynamic",
45           metadata !"fpexcept.strict") #0
46  ret <2 x double> %div
47}
48
49define <3 x float> @constrained_vector_fdiv_v3f32() #0 {
50; CHECK-LABEL: constrained_vector_fdiv_v3f32:
51; CHECK:       # %bb.0: # %entry
52; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
53; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
54; CHECK-NEXT:    divss %xmm1, %xmm2
55; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
56; CHECK-NEXT:    divss %xmm1, %xmm0
57; CHECK-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
58; CHECK-NEXT:    divss %xmm1, %xmm3
59; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
60; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
61; CHECK-NEXT:    retq
62;
63; AVX-LABEL: constrained_vector_fdiv_v3f32:
64; AVX:       # %bb.0: # %entry
65; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
66; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
67; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm1
68; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
69; AVX-NEXT:    vdivss %xmm0, %xmm2, %xmm2
70; AVX-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
71; AVX-NEXT:    vdivss %xmm0, %xmm3, %xmm0
72; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
73; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
74; AVX-NEXT:    retq
75entry:
76  %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
77           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
78           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
79           metadata !"round.dynamic",
80           metadata !"fpexcept.strict") #0
81  ret <3 x float> %div
82}
83
84define <3 x double> @constrained_vector_fdiv_v3f64() #0 {
85; CHECK-LABEL: constrained_vector_fdiv_v3f64:
86; CHECK:       # %bb.0: # %entry
87; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
88; CHECK-NEXT:    divpd {{.*}}(%rip), %xmm0
89; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
90; CHECK-NEXT:    divsd {{.*}}(%rip), %xmm1
91; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
92; CHECK-NEXT:    movapd %xmm0, %xmm1
93; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
94; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
95; CHECK-NEXT:    wait
96; CHECK-NEXT:    retq
97;
98; AVX-LABEL: constrained_vector_fdiv_v3f64:
99; AVX:       # %bb.0: # %entry
100; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
101; AVX-NEXT:    vdivsd {{.*}}(%rip), %xmm0, %xmm0
102; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0]
103; AVX-NEXT:    vdivpd {{.*}}(%rip), %xmm1, %xmm1
104; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
105; AVX-NEXT:    retq
106entry:
107  %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
108           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
109           <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
110           metadata !"round.dynamic",
111           metadata !"fpexcept.strict") #0
112  ret <3 x double> %div
113}
114
115define <4 x double> @constrained_vector_fdiv_v4f64() #0 {
116; CHECK-LABEL: constrained_vector_fdiv_v4f64:
117; CHECK:       # %bb.0: # %entry
118; CHECK-NEXT:    movapd {{.*#+}} xmm2 = [1.0E+1,1.0E+1]
119; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [3.0E+0,4.0E+0]
120; CHECK-NEXT:    divpd %xmm2, %xmm1
121; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
122; CHECK-NEXT:    divpd %xmm2, %xmm0
123; CHECK-NEXT:    retq
124;
125; AVX1-LABEL: constrained_vector_fdiv_v4f64:
126; AVX1:       # %bb.0: # %entry
127; AVX1-NEXT:    vmovapd {{.*#+}} ymm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
128; AVX1-NEXT:    vdivpd {{.*}}(%rip), %ymm0, %ymm0
129; AVX1-NEXT:    retq
130;
131; AVX512-LABEL: constrained_vector_fdiv_v4f64:
132; AVX512:       # %bb.0: # %entry
133; AVX512-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [1.0E+1,1.0E+1,1.0E+1,1.0E+1]
134; AVX512-NEXT:    vmovapd {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
135; AVX512-NEXT:    vdivpd %ymm0, %ymm1, %ymm0
136; AVX512-NEXT:    retq
137entry:
138  %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
139           <4 x double> <double 1.000000e+00, double 2.000000e+00,
140                         double 3.000000e+00, double 4.000000e+00>,
141           <4 x double> <double 1.000000e+01, double 1.000000e+01,
142                         double 1.000000e+01, double 1.000000e+01>,
143           metadata !"round.dynamic",
144           metadata !"fpexcept.strict") #0
145  ret <4 x double> %div
146}
147
148define <1 x float> @constrained_vector_frem_v1f32() #0 {
149; CHECK-LABEL: constrained_vector_frem_v1f32:
150; CHECK:       # %bb.0: # %entry
151; CHECK-NEXT:    pushq %rax
152; CHECK-NEXT:    .cfi_def_cfa_offset 16
153; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
154; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
155; CHECK-NEXT:    callq fmodf
156; CHECK-NEXT:    popq %rax
157; CHECK-NEXT:    .cfi_def_cfa_offset 8
158; CHECK-NEXT:    retq
159;
160; AVX-LABEL: constrained_vector_frem_v1f32:
161; AVX:       # %bb.0: # %entry
162; AVX-NEXT:    pushq %rax
163; AVX-NEXT:    .cfi_def_cfa_offset 16
164; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
165; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
166; AVX-NEXT:    callq fmodf
167; AVX-NEXT:    popq %rax
168; AVX-NEXT:    .cfi_def_cfa_offset 8
169; AVX-NEXT:    retq
170entry:
171  %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32(
172           <1 x float> <float 1.000000e+00>,
173           <1 x float> <float 1.000000e+01>,
174           metadata !"round.dynamic",
175           metadata !"fpexcept.strict") #0
176  ret <1 x float> %rem
177}
178
179define <2 x double> @constrained_vector_frem_v2f64() #0 {
180; CHECK-LABEL: constrained_vector_frem_v2f64:
181; CHECK:       # %bb.0: # %entry
182; CHECK-NEXT:    subq $24, %rsp
183; CHECK-NEXT:    .cfi_def_cfa_offset 32
184; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
185; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
186; CHECK-NEXT:    callq fmod
187; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
188; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
189; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
190; CHECK-NEXT:    callq fmod
191; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
192; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
193; CHECK-NEXT:    addq $24, %rsp
194; CHECK-NEXT:    .cfi_def_cfa_offset 8
195; CHECK-NEXT:    retq
196;
197; AVX-LABEL: constrained_vector_frem_v2f64:
198; AVX:       # %bb.0: # %entry
199; AVX-NEXT:    subq $24, %rsp
200; AVX-NEXT:    .cfi_def_cfa_offset 32
201; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
202; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
203; AVX-NEXT:    callq fmod
204; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
205; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
206; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
207; AVX-NEXT:    callq fmod
208; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
209; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
210; AVX-NEXT:    addq $24, %rsp
211; AVX-NEXT:    .cfi_def_cfa_offset 8
212; AVX-NEXT:    retq
213entry:
214  %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64(
215           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
216           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
217           metadata !"round.dynamic",
218           metadata !"fpexcept.strict") #0
219  ret <2 x double> %rem
220}
221
222define <3 x float> @constrained_vector_frem_v3f32() #0 {
223; CHECK-LABEL: constrained_vector_frem_v3f32:
224; CHECK:       # %bb.0: # %entry
225; CHECK-NEXT:    subq $40, %rsp
226; CHECK-NEXT:    .cfi_def_cfa_offset 48
227; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
228; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
229; CHECK-NEXT:    callq fmodf
230; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
231; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
232; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
233; CHECK-NEXT:    callq fmodf
234; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
235; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
236; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
237; CHECK-NEXT:    callq fmodf
238; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
239; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
240; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
241; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
242; CHECK-NEXT:    movaps %xmm1, %xmm0
243; CHECK-NEXT:    addq $40, %rsp
244; CHECK-NEXT:    .cfi_def_cfa_offset 8
245; CHECK-NEXT:    retq
246;
247; AVX-LABEL: constrained_vector_frem_v3f32:
248; AVX:       # %bb.0: # %entry
249; AVX-NEXT:    subq $40, %rsp
250; AVX-NEXT:    .cfi_def_cfa_offset 48
251; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
252; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
253; AVX-NEXT:    callq fmodf
254; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
255; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
256; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
257; AVX-NEXT:    callq fmodf
258; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
259; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
260; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
261; AVX-NEXT:    callq fmodf
262; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
263; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
264; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
265; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
266; AVX-NEXT:    addq $40, %rsp
267; AVX-NEXT:    .cfi_def_cfa_offset 8
268; AVX-NEXT:    retq
269entry:
270  %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32(
271           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
272           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
273           metadata !"round.dynamic",
274           metadata !"fpexcept.strict") #0
275  ret <3 x float> %rem
276}
277
278define <3 x double> @constrained_vector_frem_v3f64() #0 {
279; CHECK-LABEL: constrained_vector_frem_v3f64:
280; CHECK:       # %bb.0: # %entry
281; CHECK-NEXT:    subq $24, %rsp
282; CHECK-NEXT:    .cfi_def_cfa_offset 32
283; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
284; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
285; CHECK-NEXT:    callq fmod
286; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
287; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
288; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
289; CHECK-NEXT:    callq fmod
290; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
291; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
292; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
293; CHECK-NEXT:    callq fmod
294; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
295; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
296; CHECK-NEXT:    wait
297; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
298; CHECK-NEXT:    # xmm0 = mem[0],zero
299; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
300; CHECK-NEXT:    # xmm1 = mem[0],zero
301; CHECK-NEXT:    addq $24, %rsp
302; CHECK-NEXT:    .cfi_def_cfa_offset 8
303; CHECK-NEXT:    retq
304;
305; AVX-LABEL: constrained_vector_frem_v3f64:
306; AVX:       # %bb.0: # %entry
307; AVX-NEXT:    subq $56, %rsp
308; AVX-NEXT:    .cfi_def_cfa_offset 64
309; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
310; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
311; AVX-NEXT:    callq fmod
312; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
313; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
314; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
315; AVX-NEXT:    callq fmod
316; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
317; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
318; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
319; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
320; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
321; AVX-NEXT:    vzeroupper
322; AVX-NEXT:    callq fmod
323; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
324; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
325; AVX-NEXT:    addq $56, %rsp
326; AVX-NEXT:    .cfi_def_cfa_offset 8
327; AVX-NEXT:    retq
328entry:
329  %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64(
330           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
331           <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
332           metadata !"round.dynamic",
333           metadata !"fpexcept.strict") #0
334  ret <3 x double> %rem
335}
336
337define <4 x double> @constrained_vector_frem_v4f64() #0 {
338; CHECK-LABEL: constrained_vector_frem_v4f64:
339; CHECK:       # %bb.0:
340; CHECK-NEXT:    subq $40, %rsp
341; CHECK-NEXT:    .cfi_def_cfa_offset 48
342; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
343; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
344; CHECK-NEXT:    callq fmod
345; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
346; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
347; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
348; CHECK-NEXT:    callq fmod
349; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
350; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
351; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
352; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
353; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
354; CHECK-NEXT:    callq fmod
355; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
356; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
357; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
358; CHECK-NEXT:    callq fmod
359; CHECK-NEXT:    movaps %xmm0, %xmm1
360; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
361; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
362; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
363; CHECK-NEXT:    addq $40, %rsp
364; CHECK-NEXT:    .cfi_def_cfa_offset 8
365; CHECK-NEXT:    retq
366;
367; AVX-LABEL: constrained_vector_frem_v4f64:
368; AVX:       # %bb.0:
369; AVX-NEXT:    subq $40, %rsp
370; AVX-NEXT:    .cfi_def_cfa_offset 48
371; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
372; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
373; AVX-NEXT:    callq fmod
374; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
375; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
376; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
377; AVX-NEXT:    callq fmod
378; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
379; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
380; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
381; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
382; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
383; AVX-NEXT:    callq fmod
384; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
385; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
386; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
387; AVX-NEXT:    callq fmod
388; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
389; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
390; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
391; AVX-NEXT:    addq $40, %rsp
392; AVX-NEXT:    .cfi_def_cfa_offset 8
393; AVX-NEXT:    retq
394  %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64(
395           <4 x double> <double 1.000000e+00, double 2.000000e+00,
396                         double 3.000000e+00, double 4.000000e+00>,
397           <4 x double> <double 1.000000e+01, double 1.000000e+01,
398                         double 1.000000e+01, double 1.000000e+01>,
399           metadata !"round.dynamic",
400           metadata !"fpexcept.strict") #0
401  ret <4 x double> %rem
402}
403
404define <1 x float> @constrained_vector_fmul_v1f32() #0 {
405; CHECK-LABEL: constrained_vector_fmul_v1f32:
406; CHECK:       # %bb.0: # %entry
407; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
408; CHECK-NEXT:    mulss {{.*}}(%rip), %xmm0
409; CHECK-NEXT:    retq
410;
411; AVX-LABEL: constrained_vector_fmul_v1f32:
412; AVX:       # %bb.0: # %entry
413; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
414; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
415; AVX-NEXT:    retq
416entry:
417  %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32(
418           <1 x float> <float 0x7FF0000000000000>,
419           <1 x float> <float 2.000000e+00>,
420           metadata !"round.dynamic",
421           metadata !"fpexcept.strict") #0
422  ret <1 x float> %mul
423}
424
425define <2 x double> @constrained_vector_fmul_v2f64() #0 {
426; CHECK-LABEL: constrained_vector_fmul_v2f64:
427; CHECK:       # %bb.0: # %entry
428; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
429; CHECK-NEXT:    mulpd {{.*}}(%rip), %xmm0
430; CHECK-NEXT:    retq
431;
432; AVX-LABEL: constrained_vector_fmul_v2f64:
433; AVX:       # %bb.0: # %entry
434; AVX-NEXT:    vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
435; AVX-NEXT:    vmulpd {{.*}}(%rip), %xmm0, %xmm0
436; AVX-NEXT:    retq
437entry:
438  %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
439           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
440           <2 x double> <double 2.000000e+00, double 3.000000e+00>,
441           metadata !"round.dynamic",
442           metadata !"fpexcept.strict") #0
443  ret <2 x double> %mul
444}
445
446define <3 x float> @constrained_vector_fmul_v3f32() #0 {
447; CHECK-LABEL: constrained_vector_fmul_v3f32:
448; CHECK:       # %bb.0: # %entry
449; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
450; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
451; CHECK-NEXT:    mulss %xmm1, %xmm2
452; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
453; CHECK-NEXT:    mulss %xmm1, %xmm0
454; CHECK-NEXT:    mulss {{.*}}(%rip), %xmm1
455; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
456; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
457; CHECK-NEXT:    retq
458;
459; AVX-LABEL: constrained_vector_fmul_v3f32:
460; AVX:       # %bb.0: # %entry
461; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
462; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1
463; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm2
464; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
465; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
466; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
467; AVX-NEXT:    retq
468entry:
469  %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
470           <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000,
471                        float 0x7FF0000000000000>,
472           <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>,
473           metadata !"round.dynamic",
474           metadata !"fpexcept.strict") #0
475  ret <3 x float> %mul
476}
477
478define <3 x double> @constrained_vector_fmul_v3f64() #0 {
479; CHECK-LABEL: constrained_vector_fmul_v3f64:
480; CHECK:       # %bb.0: # %entry
481; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
482; CHECK-NEXT:    mulpd {{.*}}(%rip), %xmm0
483; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
484; CHECK-NEXT:    mulsd {{.*}}(%rip), %xmm1
485; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
486; CHECK-NEXT:    movapd %xmm0, %xmm1
487; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
488; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
489; CHECK-NEXT:    wait
490; CHECK-NEXT:    retq
491;
492; AVX-LABEL: constrained_vector_fmul_v3f64:
493; AVX:       # %bb.0: # %entry
494; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
495; AVX-NEXT:    vmulsd {{.*}}(%rip), %xmm0, %xmm0
496; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
497; AVX-NEXT:    vmulpd {{.*}}(%rip), %xmm1, %xmm1
498; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
499; AVX-NEXT:    retq
500entry:
501  %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
502           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
503                         double 0x7FEFFFFFFFFFFFFF>,
504           <3 x double> <double 1.000000e+00, double 1.000000e+01, double 1.000000e+02>,
505           metadata !"round.dynamic",
506           metadata !"fpexcept.strict") #0
507  ret <3 x double> %mul
508}
509
510define <4 x double> @constrained_vector_fmul_v4f64() #0 {
511; CHECK-LABEL: constrained_vector_fmul_v4f64:
512; CHECK:       # %bb.0: # %entry
513; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
514; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [4.0E+0,5.0E+0]
515; CHECK-NEXT:    mulpd %xmm0, %xmm1
516; CHECK-NEXT:    mulpd {{.*}}(%rip), %xmm0
517; CHECK-NEXT:    retq
518;
519; AVX1-LABEL: constrained_vector_fmul_v4f64:
520; AVX1:       # %bb.0: # %entry
521; AVX1-NEXT:    vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
522; AVX1-NEXT:    vmulpd {{.*}}(%rip), %ymm0, %ymm0
523; AVX1-NEXT:    retq
524;
525; AVX512-LABEL: constrained_vector_fmul_v4f64:
526; AVX512:       # %bb.0: # %entry
527; AVX512-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
528; AVX512-NEXT:    vmulpd {{.*}}(%rip), %ymm0, %ymm0
529; AVX512-NEXT:    retq
530entry:
531  %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
532           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
533                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
534           <4 x double> <double 2.000000e+00, double 3.000000e+00,
535                         double 4.000000e+00, double 5.000000e+00>,
536           metadata !"round.dynamic",
537           metadata !"fpexcept.strict") #0
538  ret <4 x double> %mul
539}
540
541define <1 x float> @constrained_vector_fadd_v1f32() #0 {
542; CHECK-LABEL: constrained_vector_fadd_v1f32:
543; CHECK:       # %bb.0: # %entry
544; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
545; CHECK-NEXT:    addss {{.*}}(%rip), %xmm0
546; CHECK-NEXT:    retq
547;
548; AVX-LABEL: constrained_vector_fadd_v1f32:
549; AVX:       # %bb.0: # %entry
550; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
551; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
552; AVX-NEXT:    retq
553entry:
554  %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32(
555           <1 x float> <float 0x7FF0000000000000>,
556           <1 x float> <float 1.0>,
557           metadata !"round.dynamic",
558           metadata !"fpexcept.strict") #0
559  ret <1 x float> %add
560}
561
562define <2 x double> @constrained_vector_fadd_v2f64() #0 {
563; CHECK-LABEL: constrained_vector_fadd_v2f64:
564; CHECK:       # %bb.0: # %entry
565; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
566; CHECK-NEXT:    addpd {{.*}}(%rip), %xmm0
567; CHECK-NEXT:    retq
568;
569; AVX-LABEL: constrained_vector_fadd_v2f64:
570; AVX:       # %bb.0: # %entry
571; AVX-NEXT:    vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
572; AVX-NEXT:    vaddpd {{.*}}(%rip), %xmm0, %xmm0
573; AVX-NEXT:    retq
574entry:
575  %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
576           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
577           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
578           metadata !"round.dynamic",
579           metadata !"fpexcept.strict") #0
580  ret <2 x double> %add
581}
582
583define <3 x float> @constrained_vector_fadd_v3f32() #0 {
584; CHECK-LABEL: constrained_vector_fadd_v3f32:
585; CHECK:       # %bb.0: # %entry
586; CHECK-NEXT:    xorps %xmm1, %xmm1
587; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
588; CHECK-NEXT:    addss %xmm2, %xmm1
589; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
590; CHECK-NEXT:    addss %xmm2, %xmm0
591; CHECK-NEXT:    addss {{.*}}(%rip), %xmm2
592; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
593; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
594; CHECK-NEXT:    retq
595;
596; AVX-LABEL: constrained_vector_fadd_v3f32:
597; AVX:       # %bb.0: # %entry
598; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
599; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
600; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
601; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm1, %xmm2
602; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm1, %xmm1
603; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
604; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
605; AVX-NEXT:    retq
606entry:
607  %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
608           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
609                        float 0xFFFFFFFFE0000000>,
610           <3 x float> <float 2.0, float 1.0, float 0.0>,
611           metadata !"round.dynamic",
612           metadata !"fpexcept.strict") #0
613  ret <3 x float> %add
614}
615
616define <3 x double> @constrained_vector_fadd_v3f64() #0 {
617; CHECK-LABEL: constrained_vector_fadd_v3f64:
618; CHECK:       # %bb.0: # %entry
619; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
620; CHECK-NEXT:    addpd {{.*}}(%rip), %xmm0
621; CHECK-NEXT:    xorpd %xmm1, %xmm1
622; CHECK-NEXT:    addsd {{.*}}(%rip), %xmm1
623; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
624; CHECK-NEXT:    movapd %xmm0, %xmm1
625; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
626; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
627; CHECK-NEXT:    wait
628; CHECK-NEXT:    retq
629;
630; AVX-LABEL: constrained_vector_fadd_v3f64:
631; AVX:       # %bb.0: # %entry
632; AVX-NEXT:    vxorpd %xmm0, %xmm0, %xmm0
633; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
634; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
635; AVX-NEXT:    vaddpd {{.*}}(%rip), %xmm1, %xmm1
636; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
637; AVX-NEXT:    retq
638entry:
639  %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
640           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
641                         double 0x7FEFFFFFFFFFFFFF>,
642           <3 x double> <double 2.0, double 1.0, double 0.0>,
643           metadata !"round.dynamic",
644           metadata !"fpexcept.strict") #0
645  ret <3 x double> %add
646}
647
648define <4 x double> @constrained_vector_fadd_v4f64() #0 {
649; CHECK-LABEL: constrained_vector_fadd_v4f64:
650; CHECK:       # %bb.0: # %entry
651; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
652; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [2.0E+0,2.0000000000000001E-1]
653; CHECK-NEXT:    addpd %xmm0, %xmm1
654; CHECK-NEXT:    addpd {{.*}}(%rip), %xmm0
655; CHECK-NEXT:    retq
656;
657; AVX1-LABEL: constrained_vector_fadd_v4f64:
658; AVX1:       # %bb.0: # %entry
659; AVX1-NEXT:    vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
660; AVX1-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
661; AVX1-NEXT:    retq
662;
663; AVX512-LABEL: constrained_vector_fadd_v4f64:
664; AVX512:       # %bb.0: # %entry
665; AVX512-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
666; AVX512-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
667; AVX512-NEXT:    retq
668entry:
669  %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
670           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
671                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
672           <4 x double> <double 1.000000e+00, double 1.000000e-01,
673                         double 2.000000e+00, double 2.000000e-01>,
674           metadata !"round.dynamic",
675           metadata !"fpexcept.strict") #0
676  ret <4 x double> %add
677}
678
679define <1 x float> @constrained_vector_fsub_v1f32() #0 {
680; CHECK-LABEL: constrained_vector_fsub_v1f32:
681; CHECK:       # %bb.0: # %entry
682; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
683; CHECK-NEXT:    subss {{.*}}(%rip), %xmm0
684; CHECK-NEXT:    retq
685;
686; AVX-LABEL: constrained_vector_fsub_v1f32:
687; AVX:       # %bb.0: # %entry
688; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
689; AVX-NEXT:    vsubss {{.*}}(%rip), %xmm0, %xmm0
690; AVX-NEXT:    retq
691entry:
692  %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32(
693           <1 x float> <float 0x7FF0000000000000>,
694           <1 x float> <float 1.000000e+00>,
695           metadata !"round.dynamic",
696           metadata !"fpexcept.strict") #0
697  ret <1 x float> %sub
698}
699
700define <2 x double> @constrained_vector_fsub_v2f64() #0 {
701; CHECK-LABEL: constrained_vector_fsub_v2f64:
702; CHECK:       # %bb.0: # %entry
703; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
704; CHECK-NEXT:    subpd {{.*}}(%rip), %xmm0
705; CHECK-NEXT:    retq
706;
707; AVX-LABEL: constrained_vector_fsub_v2f64:
708; AVX:       # %bb.0: # %entry
709; AVX-NEXT:    vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
710; AVX-NEXT:    vsubpd {{.*}}(%rip), %xmm0, %xmm0
711; AVX-NEXT:    retq
712entry:
713  %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
714           <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
715           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
716           metadata !"round.dynamic",
717           metadata !"fpexcept.strict") #0
718  ret <2 x double> %sub
719}
720
721define <3 x float> @constrained_vector_fsub_v3f32() #0 {
722; CHECK-LABEL: constrained_vector_fsub_v3f32:
723; CHECK:       # %bb.0: # %entry
724; CHECK-NEXT:    xorps %xmm0, %xmm0
725; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
726; CHECK-NEXT:    movaps %xmm1, %xmm2
727; CHECK-NEXT:    subss %xmm0, %xmm2
728; CHECK-NEXT:    movaps %xmm1, %xmm0
729; CHECK-NEXT:    subss {{.*}}(%rip), %xmm0
730; CHECK-NEXT:    subss {{.*}}(%rip), %xmm1
731; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
732; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
733; CHECK-NEXT:    retq
734;
735; AVX-LABEL: constrained_vector_fsub_v3f32:
736; AVX:       # %bb.0: # %entry
737; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
738; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
739; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
740; AVX-NEXT:    vsubss {{.*}}(%rip), %xmm1, %xmm2
741; AVX-NEXT:    vsubss {{.*}}(%rip), %xmm1, %xmm1
742; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
743; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
744; AVX-NEXT:    retq
745entry:
746  %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
747           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
748                        float 0xFFFFFFFFE0000000>,
749           <3 x float> <float 2.0, float 1.0, float 0.0>,
750           metadata !"round.dynamic",
751           metadata !"fpexcept.strict") #0
752  ret <3 x float> %sub
753}
754
755define <3 x double> @constrained_vector_fsub_v3f64() #0 {
756; CHECK-LABEL: constrained_vector_fsub_v3f64:
757; CHECK:       # %bb.0: # %entry
758; CHECK-NEXT:    xorpd %xmm0, %xmm0
759; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
760; CHECK-NEXT:    subsd %xmm0, %xmm1
761; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
762; CHECK-NEXT:    subpd {{.*}}(%rip), %xmm0
763; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
764; CHECK-NEXT:    movapd %xmm0, %xmm1
765; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
766; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
767; CHECK-NEXT:    wait
768; CHECK-NEXT:    retq
769;
770; AVX-LABEL: constrained_vector_fsub_v3f64:
771; AVX:       # %bb.0: # %entry
772; AVX-NEXT:    vxorpd %xmm0, %xmm0, %xmm0
773; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
774; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
775; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
776; AVX-NEXT:    vsubpd {{.*}}(%rip), %xmm1, %xmm1
777; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
778; AVX-NEXT:    retq
779entry:
780  %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
781           <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
782                         double 0xFFEFFFFFFFFFFFFF>,
783           <3 x double> <double 2.0, double 1.0, double 0.0>,
784           metadata !"round.dynamic",
785           metadata !"fpexcept.strict") #0
786  ret <3 x double> %sub
787}
788
789define <4 x double> @constrained_vector_fsub_v4f64() #0 {
790; CHECK-LABEL: constrained_vector_fsub_v4f64:
791; CHECK:       # %bb.0: # %entry
792; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
793; CHECK-NEXT:    movapd %xmm0, %xmm1
794; CHECK-NEXT:    subpd {{.*}}(%rip), %xmm1
795; CHECK-NEXT:    subpd {{.*}}(%rip), %xmm0
796; CHECK-NEXT:    retq
797;
798; AVX1-LABEL: constrained_vector_fsub_v4f64:
799; AVX1:       # %bb.0: # %entry
800; AVX1-NEXT:    vmovapd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308]
801; AVX1-NEXT:    vsubpd {{.*}}(%rip), %ymm0, %ymm0
802; AVX1-NEXT:    retq
803;
804; AVX512-LABEL: constrained_vector_fsub_v4f64:
805; AVX512:       # %bb.0: # %entry
806; AVX512-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308]
807; AVX512-NEXT:    vsubpd {{.*}}(%rip), %ymm0, %ymm0
808; AVX512-NEXT:    retq
809entry:
810  %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
811           <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
812                         double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
813           <4 x double> <double 1.000000e+00, double 1.000000e-01,
814                         double 2.000000e+00, double 2.000000e-01>,
815           metadata !"round.dynamic",
816           metadata !"fpexcept.strict") #0
817  ret <4 x double> %sub
818}
819
820define <1 x float> @constrained_vector_sqrt_v1f32() #0 {
821; CHECK-LABEL: constrained_vector_sqrt_v1f32:
822; CHECK:       # %bb.0: # %entry
823; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
824; CHECK-NEXT:    sqrtss %xmm0, %xmm0
825; CHECK-NEXT:    retq
826;
827; AVX-LABEL: constrained_vector_sqrt_v1f32:
828; AVX:       # %bb.0: # %entry
829; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
830; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
831; AVX-NEXT:    retq
832entry:
833  %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32(
834                              <1 x float> <float 42.0>,
835                              metadata !"round.dynamic",
836                              metadata !"fpexcept.strict") #0
837  ret <1 x float> %sqrt
838}
839
840define <2 x double> @constrained_vector_sqrt_v2f64() #0 {
841; CHECK-LABEL: constrained_vector_sqrt_v2f64:
842; CHECK:       # %bb.0: # %entry
843; CHECK-NEXT:    sqrtpd {{.*}}(%rip), %xmm0
844; CHECK-NEXT:    retq
845;
846; AVX-LABEL: constrained_vector_sqrt_v2f64:
847; AVX:       # %bb.0: # %entry
848; AVX-NEXT:    vsqrtpd {{.*}}(%rip), %xmm0
849; AVX-NEXT:    retq
850entry:
851  %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
852                              <2 x double> <double 42.0, double 42.1>,
853                              metadata !"round.dynamic",
854                              metadata !"fpexcept.strict") #0
855  ret <2 x double> %sqrt
856}
857
858define <3 x float> @constrained_vector_sqrt_v3f32() #0 {
859; CHECK-LABEL: constrained_vector_sqrt_v3f32:
860; CHECK:       # %bb.0: # %entry
861; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
862; CHECK-NEXT:    sqrtss %xmm0, %xmm1
863; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
864; CHECK-NEXT:    sqrtss %xmm0, %xmm0
865; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
866; CHECK-NEXT:    sqrtss %xmm2, %xmm2
867; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
868; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
869; CHECK-NEXT:    retq
870;
871; AVX-LABEL: constrained_vector_sqrt_v3f32:
872; AVX:       # %bb.0: # %entry
873; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
874; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
875; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
876; AVX-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1
877; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
878; AVX-NEXT:    vsqrtss %xmm2, %xmm2, %xmm2
879; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
880; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
881; AVX-NEXT:    retq
882entry:
883  %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
884                              <3 x float> <float 42.0, float 43.0, float 44.0>,
885                              metadata !"round.dynamic",
886                              metadata !"fpexcept.strict") #0
887  ret <3 x float> %sqrt
888}
889
890define <3 x double> @constrained_vector_sqrt_v3f64() #0 {
891; CHECK-LABEL: constrained_vector_sqrt_v3f64:
892; CHECK:       # %bb.0: # %entry
893; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
894; CHECK-NEXT:    sqrtsd %xmm0, %xmm1
895; CHECK-NEXT:    sqrtpd {{.*}}(%rip), %xmm0
896; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
897; CHECK-NEXT:    movapd %xmm0, %xmm1
898; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
899; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
900; CHECK-NEXT:    wait
901; CHECK-NEXT:    retq
902;
903; AVX-LABEL: constrained_vector_sqrt_v3f64:
904; AVX:       # %bb.0: # %entry
905; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
906; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
907; AVX-NEXT:    vsqrtpd {{.*}}(%rip), %xmm1
908; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
909; AVX-NEXT:    retq
910entry:
911  %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
912                          <3 x double> <double 42.0, double 42.1, double 42.2>,
913                          metadata !"round.dynamic",
914                          metadata !"fpexcept.strict") #0
915  ret <3 x double> %sqrt
916}
917
918define <4 x double> @constrained_vector_sqrt_v4f64() #0 {
919; CHECK-LABEL: constrained_vector_sqrt_v4f64:
920; CHECK:       # %bb.0: # %entry
921; CHECK-NEXT:    sqrtpd {{.*}}(%rip), %xmm1
922; CHECK-NEXT:    sqrtpd {{.*}}(%rip), %xmm0
923; CHECK-NEXT:    retq
924;
925; AVX-LABEL: constrained_vector_sqrt_v4f64:
926; AVX:       # %bb.0: # %entry
927; AVX-NEXT:    vsqrtpd {{.*}}(%rip), %ymm0
928; AVX-NEXT:    retq
929 entry:
930  %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
931                              <4 x double> <double 42.0, double 42.1,
932                                            double 42.2, double 42.3>,
933                              metadata !"round.dynamic",
934                              metadata !"fpexcept.strict") #0
935  ret <4 x double> %sqrt
936}
937
938define <1 x float> @constrained_vector_pow_v1f32() #0 {
939; CHECK-LABEL: constrained_vector_pow_v1f32:
940; CHECK:       # %bb.0: # %entry
941; CHECK-NEXT:    pushq %rax
942; CHECK-NEXT:    .cfi_def_cfa_offset 16
943; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
944; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
945; CHECK-NEXT:    callq powf
946; CHECK-NEXT:    popq %rax
947; CHECK-NEXT:    .cfi_def_cfa_offset 8
948; CHECK-NEXT:    retq
949;
950; AVX-LABEL: constrained_vector_pow_v1f32:
951; AVX:       # %bb.0: # %entry
952; AVX-NEXT:    pushq %rax
953; AVX-NEXT:    .cfi_def_cfa_offset 16
954; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
955; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
956; AVX-NEXT:    callq powf
957; AVX-NEXT:    popq %rax
958; AVX-NEXT:    .cfi_def_cfa_offset 8
959; AVX-NEXT:    retq
960entry:
961  %pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32(
962                             <1 x float> <float 42.0>,
963                             <1 x float> <float 3.0>,
964                             metadata !"round.dynamic",
965                             metadata !"fpexcept.strict") #0
966  ret <1 x float> %pow
967}
968
969define <2 x double> @constrained_vector_pow_v2f64() #0 {
970; CHECK-LABEL: constrained_vector_pow_v2f64:
971; CHECK:       # %bb.0: # %entry
972; CHECK-NEXT:    subq $24, %rsp
973; CHECK-NEXT:    .cfi_def_cfa_offset 32
974; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
975; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
976; CHECK-NEXT:    callq pow
977; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
978; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
979; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
980; CHECK-NEXT:    callq pow
981; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
982; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
983; CHECK-NEXT:    addq $24, %rsp
984; CHECK-NEXT:    .cfi_def_cfa_offset 8
985; CHECK-NEXT:    retq
986;
987; AVX-LABEL: constrained_vector_pow_v2f64:
988; AVX:       # %bb.0: # %entry
989; AVX-NEXT:    subq $24, %rsp
990; AVX-NEXT:    .cfi_def_cfa_offset 32
991; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
992; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
993; AVX-NEXT:    callq pow
994; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
995; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
996; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
997; AVX-NEXT:    callq pow
998; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
999; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1000; AVX-NEXT:    addq $24, %rsp
1001; AVX-NEXT:    .cfi_def_cfa_offset 8
1002; AVX-NEXT:    retq
1003entry:
1004  %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
1005                             <2 x double> <double 42.1, double 42.2>,
1006                             <2 x double> <double 3.0, double 3.0>,
1007                             metadata !"round.dynamic",
1008                             metadata !"fpexcept.strict") #0
1009  ret <2 x double> %pow
1010}
1011
1012define <3 x float> @constrained_vector_pow_v3f32() #0 {
1013; CHECK-LABEL: constrained_vector_pow_v3f32:
1014; CHECK:       # %bb.0: # %entry
1015; CHECK-NEXT:    subq $40, %rsp
1016; CHECK-NEXT:    .cfi_def_cfa_offset 48
1017; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1018; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1019; CHECK-NEXT:    callq powf
1020; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1021; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1022; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1023; CHECK-NEXT:    callq powf
1024; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1025; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1026; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1027; CHECK-NEXT:    callq powf
1028; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
1029; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1030; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1031; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1032; CHECK-NEXT:    movaps %xmm1, %xmm0
1033; CHECK-NEXT:    addq $40, %rsp
1034; CHECK-NEXT:    .cfi_def_cfa_offset 8
1035; CHECK-NEXT:    retq
1036;
1037; AVX-LABEL: constrained_vector_pow_v3f32:
1038; AVX:       # %bb.0: # %entry
1039; AVX-NEXT:    subq $40, %rsp
1040; AVX-NEXT:    .cfi_def_cfa_offset 48
1041; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1042; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1043; AVX-NEXT:    callq powf
1044; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1045; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1046; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1047; AVX-NEXT:    callq powf
1048; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1049; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1050; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1051; AVX-NEXT:    callq powf
1052; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1053; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1054; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1055; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1056; AVX-NEXT:    addq $40, %rsp
1057; AVX-NEXT:    .cfi_def_cfa_offset 8
1058; AVX-NEXT:    retq
1059entry:
1060  %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32(
1061                             <3 x float> <float 42.0, float 43.0, float 44.0>,
1062                             <3 x float> <float 3.0, float 3.0, float 3.0>,
1063                             metadata !"round.dynamic",
1064                             metadata !"fpexcept.strict") #0
1065  ret <3 x float> %pow
1066}
1067
1068define <3 x double> @constrained_vector_pow_v3f64() #0 {
1069; CHECK-LABEL: constrained_vector_pow_v3f64:
1070; CHECK:       # %bb.0: # %entry
1071; CHECK-NEXT:    subq $24, %rsp
1072; CHECK-NEXT:    .cfi_def_cfa_offset 32
1073; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1074; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1075; CHECK-NEXT:    callq pow
1076; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1077; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1078; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1079; CHECK-NEXT:    callq pow
1080; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
1081; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1082; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1083; CHECK-NEXT:    callq pow
1084; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
1085; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
1086; CHECK-NEXT:    wait
1087; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
1088; CHECK-NEXT:    # xmm0 = mem[0],zero
1089; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1090; CHECK-NEXT:    # xmm1 = mem[0],zero
1091; CHECK-NEXT:    addq $24, %rsp
1092; CHECK-NEXT:    .cfi_def_cfa_offset 8
1093; CHECK-NEXT:    retq
1094;
1095; AVX-LABEL: constrained_vector_pow_v3f64:
1096; AVX:       # %bb.0: # %entry
1097; AVX-NEXT:    subq $56, %rsp
1098; AVX-NEXT:    .cfi_def_cfa_offset 64
1099; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1100; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1101; AVX-NEXT:    callq pow
1102; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1103; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1104; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1105; AVX-NEXT:    callq pow
1106; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1107; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1108; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
1109; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1110; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1111; AVX-NEXT:    vzeroupper
1112; AVX-NEXT:    callq pow
1113; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1114; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1115; AVX-NEXT:    addq $56, %rsp
1116; AVX-NEXT:    .cfi_def_cfa_offset 8
1117; AVX-NEXT:    retq
1118entry:
1119  %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64(
1120                          <3 x double> <double 42.0, double 42.1, double 42.2>,
1121                          <3 x double> <double 3.0, double 3.0, double 3.0>,
1122                          metadata !"round.dynamic",
1123                          metadata !"fpexcept.strict") #0
1124  ret <3 x double> %pow
1125}
1126
1127define <4 x double> @constrained_vector_pow_v4f64() #0 {
1128; CHECK-LABEL: constrained_vector_pow_v4f64:
1129; CHECK:       # %bb.0: # %entry
1130; CHECK-NEXT:    subq $40, %rsp
1131; CHECK-NEXT:    .cfi_def_cfa_offset 48
1132; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1133; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1134; CHECK-NEXT:    callq pow
1135; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1136; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1137; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1138; CHECK-NEXT:    callq pow
1139; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1140; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1141; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1142; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1143; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1144; CHECK-NEXT:    callq pow
1145; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1146; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1147; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1148; CHECK-NEXT:    callq pow
1149; CHECK-NEXT:    movaps %xmm0, %xmm1
1150; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1151; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1152; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1153; CHECK-NEXT:    addq $40, %rsp
1154; CHECK-NEXT:    .cfi_def_cfa_offset 8
1155; CHECK-NEXT:    retq
1156;
1157; AVX-LABEL: constrained_vector_pow_v4f64:
1158; AVX:       # %bb.0: # %entry
1159; AVX-NEXT:    subq $40, %rsp
1160; AVX-NEXT:    .cfi_def_cfa_offset 48
1161; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1162; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1163; AVX-NEXT:    callq pow
1164; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1165; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1166; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1167; AVX-NEXT:    callq pow
1168; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1169; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1170; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1171; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1172; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1173; AVX-NEXT:    callq pow
1174; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1175; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1176; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1177; AVX-NEXT:    callq pow
1178; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1179; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1180; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1181; AVX-NEXT:    addq $40, %rsp
1182; AVX-NEXT:    .cfi_def_cfa_offset 8
1183; AVX-NEXT:    retq
1184entry:
1185  %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
1186                             <4 x double> <double 42.1, double 42.2,
1187                                           double 42.3, double 42.4>,
1188                             <4 x double> <double 3.0, double 3.0,
1189                                           double 3.0, double 3.0>,
1190                             metadata !"round.dynamic",
1191                             metadata !"fpexcept.strict") #0
1192  ret <4 x double> %pow
1193}
1194
1195define <1 x float> @constrained_vector_powi_v1f32() #0 {
1196; CHECK-LABEL: constrained_vector_powi_v1f32:
1197; CHECK:       # %bb.0: # %entry
1198; CHECK-NEXT:    pushq %rax
1199; CHECK-NEXT:    .cfi_def_cfa_offset 16
1200; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1201; CHECK-NEXT:    movl $3, %edi
1202; CHECK-NEXT:    callq __powisf2
1203; CHECK-NEXT:    popq %rax
1204; CHECK-NEXT:    .cfi_def_cfa_offset 8
1205; CHECK-NEXT:    retq
1206;
1207; AVX-LABEL: constrained_vector_powi_v1f32:
1208; AVX:       # %bb.0: # %entry
1209; AVX-NEXT:    pushq %rax
1210; AVX-NEXT:    .cfi_def_cfa_offset 16
1211; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1212; AVX-NEXT:    movl $3, %edi
1213; AVX-NEXT:    callq __powisf2
1214; AVX-NEXT:    popq %rax
1215; AVX-NEXT:    .cfi_def_cfa_offset 8
1216; AVX-NEXT:    retq
1217entry:
1218  %powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32(
1219                              <1 x float> <float 42.0>,
1220                              i32 3,
1221                              metadata !"round.dynamic",
1222                              metadata !"fpexcept.strict") #0
1223  ret <1 x float> %powi
1224}
1225
1226define <2 x double> @constrained_vector_powi_v2f64() #0 {
1227; CHECK-LABEL: constrained_vector_powi_v2f64:
1228; CHECK:       # %bb.0: # %entry
1229; CHECK-NEXT:    subq $24, %rsp
1230; CHECK-NEXT:    .cfi_def_cfa_offset 32
1231; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1232; CHECK-NEXT:    movl $3, %edi
1233; CHECK-NEXT:    callq __powidf2
1234; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1235; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1236; CHECK-NEXT:    movl $3, %edi
1237; CHECK-NEXT:    callq __powidf2
1238; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1239; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1240; CHECK-NEXT:    addq $24, %rsp
1241; CHECK-NEXT:    .cfi_def_cfa_offset 8
1242; CHECK-NEXT:    retq
1243;
1244; AVX-LABEL: constrained_vector_powi_v2f64:
1245; AVX:       # %bb.0: # %entry
1246; AVX-NEXT:    subq $24, %rsp
1247; AVX-NEXT:    .cfi_def_cfa_offset 32
1248; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1249; AVX-NEXT:    movl $3, %edi
1250; AVX-NEXT:    callq __powidf2
1251; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1252; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1253; AVX-NEXT:    movl $3, %edi
1254; AVX-NEXT:    callq __powidf2
1255; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1256; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1257; AVX-NEXT:    addq $24, %rsp
1258; AVX-NEXT:    .cfi_def_cfa_offset 8
1259; AVX-NEXT:    retq
1260entry:
1261  %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
1262                              <2 x double> <double 42.1, double 42.2>,
1263                              i32 3,
1264                              metadata !"round.dynamic",
1265                              metadata !"fpexcept.strict") #0
1266  ret <2 x double> %powi
1267}
1268
1269define <3 x float> @constrained_vector_powi_v3f32() #0 {
1270; CHECK-LABEL: constrained_vector_powi_v3f32:
1271; CHECK:       # %bb.0: # %entry
1272; CHECK-NEXT:    subq $40, %rsp
1273; CHECK-NEXT:    .cfi_def_cfa_offset 48
1274; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1275; CHECK-NEXT:    movl $3, %edi
1276; CHECK-NEXT:    callq __powisf2
1277; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1278; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1279; CHECK-NEXT:    movl $3, %edi
1280; CHECK-NEXT:    callq __powisf2
1281; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1282; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1283; CHECK-NEXT:    movl $3, %edi
1284; CHECK-NEXT:    callq __powisf2
1285; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
1286; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1287; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1288; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1289; CHECK-NEXT:    movaps %xmm1, %xmm0
1290; CHECK-NEXT:    addq $40, %rsp
1291; CHECK-NEXT:    .cfi_def_cfa_offset 8
1292; CHECK-NEXT:    retq
1293;
1294; AVX-LABEL: constrained_vector_powi_v3f32:
1295; AVX:       # %bb.0: # %entry
1296; AVX-NEXT:    subq $40, %rsp
1297; AVX-NEXT:    .cfi_def_cfa_offset 48
1298; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1299; AVX-NEXT:    movl $3, %edi
1300; AVX-NEXT:    callq __powisf2
1301; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1302; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1303; AVX-NEXT:    movl $3, %edi
1304; AVX-NEXT:    callq __powisf2
1305; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1306; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1307; AVX-NEXT:    movl $3, %edi
1308; AVX-NEXT:    callq __powisf2
1309; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1310; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1311; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1312; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1313; AVX-NEXT:    addq $40, %rsp
1314; AVX-NEXT:    .cfi_def_cfa_offset 8
1315; AVX-NEXT:    retq
1316entry:
1317  %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32(
1318                              <3 x float> <float 42.0, float 43.0, float 44.0>,
1319                              i32 3,
1320                              metadata !"round.dynamic",
1321                              metadata !"fpexcept.strict") #0
1322  ret <3 x float> %powi
1323}
1324
1325define <3 x double> @constrained_vector_powi_v3f64() #0 {
1326; CHECK-LABEL: constrained_vector_powi_v3f64:
1327; CHECK:       # %bb.0: # %entry
1328; CHECK-NEXT:    subq $24, %rsp
1329; CHECK-NEXT:    .cfi_def_cfa_offset 32
1330; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1331; CHECK-NEXT:    movl $3, %edi
1332; CHECK-NEXT:    callq __powidf2
1333; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1334; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1335; CHECK-NEXT:    movl $3, %edi
1336; CHECK-NEXT:    callq __powidf2
1337; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
1338; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1339; CHECK-NEXT:    movl $3, %edi
1340; CHECK-NEXT:    callq __powidf2
1341; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
1342; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
1343; CHECK-NEXT:    wait
1344; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
1345; CHECK-NEXT:    # xmm0 = mem[0],zero
1346; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1347; CHECK-NEXT:    # xmm1 = mem[0],zero
1348; CHECK-NEXT:    addq $24, %rsp
1349; CHECK-NEXT:    .cfi_def_cfa_offset 8
1350; CHECK-NEXT:    retq
1351;
1352; AVX-LABEL: constrained_vector_powi_v3f64:
1353; AVX:       # %bb.0: # %entry
1354; AVX-NEXT:    subq $56, %rsp
1355; AVX-NEXT:    .cfi_def_cfa_offset 64
1356; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1357; AVX-NEXT:    movl $3, %edi
1358; AVX-NEXT:    callq __powidf2
1359; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1360; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1361; AVX-NEXT:    movl $3, %edi
1362; AVX-NEXT:    callq __powidf2
1363; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1364; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1365; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
1366; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1367; AVX-NEXT:    movl $3, %edi
1368; AVX-NEXT:    vzeroupper
1369; AVX-NEXT:    callq __powidf2
1370; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1371; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1372; AVX-NEXT:    addq $56, %rsp
1373; AVX-NEXT:    .cfi_def_cfa_offset 8
1374; AVX-NEXT:    retq
1375entry:
1376  %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64(
1377                          <3 x double> <double 42.0, double 42.1, double 42.2>,
1378                          i32 3,
1379                          metadata !"round.dynamic",
1380                          metadata !"fpexcept.strict") #0
1381  ret <3 x double> %powi
1382}
1383
1384define <4 x double> @constrained_vector_powi_v4f64() #0 {
1385; CHECK-LABEL: constrained_vector_powi_v4f64:
1386; CHECK:       # %bb.0: # %entry
1387; CHECK-NEXT:    subq $40, %rsp
1388; CHECK-NEXT:    .cfi_def_cfa_offset 48
1389; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1390; CHECK-NEXT:    movl $3, %edi
1391; CHECK-NEXT:    callq __powidf2
1392; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1393; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1394; CHECK-NEXT:    movl $3, %edi
1395; CHECK-NEXT:    callq __powidf2
1396; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1397; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1398; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1399; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1400; CHECK-NEXT:    movl $3, %edi
1401; CHECK-NEXT:    callq __powidf2
1402; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1403; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1404; CHECK-NEXT:    movl $3, %edi
1405; CHECK-NEXT:    callq __powidf2
1406; CHECK-NEXT:    movaps %xmm0, %xmm1
1407; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1408; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1409; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1410; CHECK-NEXT:    addq $40, %rsp
1411; CHECK-NEXT:    .cfi_def_cfa_offset 8
1412; CHECK-NEXT:    retq
1413;
1414; AVX-LABEL: constrained_vector_powi_v4f64:
1415; AVX:       # %bb.0: # %entry
1416; AVX-NEXT:    subq $40, %rsp
1417; AVX-NEXT:    .cfi_def_cfa_offset 48
1418; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1419; AVX-NEXT:    movl $3, %edi
1420; AVX-NEXT:    callq __powidf2
1421; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1422; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1423; AVX-NEXT:    movl $3, %edi
1424; AVX-NEXT:    callq __powidf2
1425; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1426; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1427; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1428; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1429; AVX-NEXT:    movl $3, %edi
1430; AVX-NEXT:    callq __powidf2
1431; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1432; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1433; AVX-NEXT:    movl $3, %edi
1434; AVX-NEXT:    callq __powidf2
1435; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1436; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1437; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1438; AVX-NEXT:    addq $40, %rsp
1439; AVX-NEXT:    .cfi_def_cfa_offset 8
1440; AVX-NEXT:    retq
1441entry:
1442  %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
1443                              <4 x double> <double 42.1, double 42.2,
1444                                            double 42.3, double 42.4>,
1445                              i32 3,
1446                              metadata !"round.dynamic",
1447                              metadata !"fpexcept.strict") #0
1448  ret <4 x double> %powi
1449}
1450
1451define <1 x float> @constrained_vector_sin_v1f32() #0 {
1452; CHECK-LABEL: constrained_vector_sin_v1f32:
1453; CHECK:       # %bb.0: # %entry
1454; CHECK-NEXT:    pushq %rax
1455; CHECK-NEXT:    .cfi_def_cfa_offset 16
1456; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1457; CHECK-NEXT:    callq sinf
1458; CHECK-NEXT:    popq %rax
1459; CHECK-NEXT:    .cfi_def_cfa_offset 8
1460; CHECK-NEXT:    retq
1461;
1462; AVX-LABEL: constrained_vector_sin_v1f32:
1463; AVX:       # %bb.0: # %entry
1464; AVX-NEXT:    pushq %rax
1465; AVX-NEXT:    .cfi_def_cfa_offset 16
1466; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1467; AVX-NEXT:    callq sinf
1468; AVX-NEXT:    popq %rax
1469; AVX-NEXT:    .cfi_def_cfa_offset 8
1470; AVX-NEXT:    retq
1471entry:
1472  %sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32(
1473                             <1 x float> <float 42.0>,
1474                             metadata !"round.dynamic",
1475                             metadata !"fpexcept.strict") #0
1476  ret <1 x float> %sin
1477}
1478
1479define <2 x double> @constrained_vector_sin_v2f64() #0 {
1480; CHECK-LABEL: constrained_vector_sin_v2f64:
1481; CHECK:       # %bb.0: # %entry
1482; CHECK-NEXT:    subq $24, %rsp
1483; CHECK-NEXT:    .cfi_def_cfa_offset 32
1484; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1485; CHECK-NEXT:    callq sin
1486; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1487; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1488; CHECK-NEXT:    callq sin
1489; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1490; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1491; CHECK-NEXT:    addq $24, %rsp
1492; CHECK-NEXT:    .cfi_def_cfa_offset 8
1493; CHECK-NEXT:    retq
1494;
1495; AVX-LABEL: constrained_vector_sin_v2f64:
1496; AVX:       # %bb.0: # %entry
1497; AVX-NEXT:    subq $24, %rsp
1498; AVX-NEXT:    .cfi_def_cfa_offset 32
1499; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1500; AVX-NEXT:    callq sin
1501; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1502; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1503; AVX-NEXT:    callq sin
1504; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1505; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1506; AVX-NEXT:    addq $24, %rsp
1507; AVX-NEXT:    .cfi_def_cfa_offset 8
1508; AVX-NEXT:    retq
1509entry:
1510  %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
1511                             <2 x double> <double 42.0, double 42.1>,
1512                             metadata !"round.dynamic",
1513                             metadata !"fpexcept.strict") #0
1514  ret <2 x double> %sin
1515}
1516
1517define <3 x float> @constrained_vector_sin_v3f32() #0 {
1518; CHECK-LABEL: constrained_vector_sin_v3f32:
1519; CHECK:       # %bb.0: # %entry
1520; CHECK-NEXT:    subq $40, %rsp
1521; CHECK-NEXT:    .cfi_def_cfa_offset 48
1522; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1523; CHECK-NEXT:    callq sinf
1524; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1525; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1526; CHECK-NEXT:    callq sinf
1527; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1528; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1529; CHECK-NEXT:    callq sinf
1530; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
1531; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1532; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1533; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1534; CHECK-NEXT:    movaps %xmm1, %xmm0
1535; CHECK-NEXT:    addq $40, %rsp
1536; CHECK-NEXT:    .cfi_def_cfa_offset 8
1537; CHECK-NEXT:    retq
1538;
1539; AVX-LABEL: constrained_vector_sin_v3f32:
1540; AVX:       # %bb.0: # %entry
1541; AVX-NEXT:    subq $40, %rsp
1542; AVX-NEXT:    .cfi_def_cfa_offset 48
1543; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1544; AVX-NEXT:    callq sinf
1545; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1546; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1547; AVX-NEXT:    callq sinf
1548; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1549; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1550; AVX-NEXT:    callq sinf
1551; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1552; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1553; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1554; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1555; AVX-NEXT:    addq $40, %rsp
1556; AVX-NEXT:    .cfi_def_cfa_offset 8
1557; AVX-NEXT:    retq
1558entry:
1559  %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32(
1560                              <3 x float> <float 42.0, float 43.0, float 44.0>,
1561                              metadata !"round.dynamic",
1562                              metadata !"fpexcept.strict") #0
1563  ret <3 x float> %sin
1564}
1565
1566define <3 x double> @constrained_vector_sin_v3f64() #0 {
1567; CHECK-LABEL: constrained_vector_sin_v3f64:
1568; CHECK:       # %bb.0: # %entry
1569; CHECK-NEXT:    subq $24, %rsp
1570; CHECK-NEXT:    .cfi_def_cfa_offset 32
1571; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1572; CHECK-NEXT:    callq sin
1573; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1574; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1575; CHECK-NEXT:    callq sin
1576; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
1577; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1578; CHECK-NEXT:    callq sin
1579; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
1580; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
1581; CHECK-NEXT:    wait
1582; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
1583; CHECK-NEXT:    # xmm0 = mem[0],zero
1584; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1585; CHECK-NEXT:    # xmm1 = mem[0],zero
1586; CHECK-NEXT:    addq $24, %rsp
1587; CHECK-NEXT:    .cfi_def_cfa_offset 8
1588; CHECK-NEXT:    retq
1589;
1590; AVX-LABEL: constrained_vector_sin_v3f64:
1591; AVX:       # %bb.0: # %entry
1592; AVX-NEXT:    subq $56, %rsp
1593; AVX-NEXT:    .cfi_def_cfa_offset 64
1594; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1595; AVX-NEXT:    callq sin
1596; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1597; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1598; AVX-NEXT:    callq sin
1599; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1600; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1601; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
1602; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1603; AVX-NEXT:    vzeroupper
1604; AVX-NEXT:    callq sin
1605; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1606; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1607; AVX-NEXT:    addq $56, %rsp
1608; AVX-NEXT:    .cfi_def_cfa_offset 8
1609; AVX-NEXT:    retq
1610entry:
1611  %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64(
1612                          <3 x double> <double 42.0, double 42.1, double 42.2>,
1613                          metadata !"round.dynamic",
1614                          metadata !"fpexcept.strict") #0
1615  ret <3 x double> %sin
1616}
1617
1618define <4 x double> @constrained_vector_sin_v4f64() #0 {
1619; CHECK-LABEL: constrained_vector_sin_v4f64:
1620; CHECK:       # %bb.0: # %entry
1621; CHECK-NEXT:    subq $40, %rsp
1622; CHECK-NEXT:    .cfi_def_cfa_offset 48
1623; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1624; CHECK-NEXT:    callq sin
1625; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1626; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1627; CHECK-NEXT:    callq sin
1628; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1629; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1630; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1631; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1632; CHECK-NEXT:    callq sin
1633; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1634; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1635; CHECK-NEXT:    callq sin
1636; CHECK-NEXT:    movaps %xmm0, %xmm1
1637; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1638; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1639; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1640; CHECK-NEXT:    addq $40, %rsp
1641; CHECK-NEXT:    .cfi_def_cfa_offset 8
1642; CHECK-NEXT:    retq
1643;
1644; AVX-LABEL: constrained_vector_sin_v4f64:
1645; AVX:       # %bb.0: # %entry
1646; AVX-NEXT:    subq $40, %rsp
1647; AVX-NEXT:    .cfi_def_cfa_offset 48
1648; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1649; AVX-NEXT:    callq sin
1650; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1651; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1652; AVX-NEXT:    callq sin
1653; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1654; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1655; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1656; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1657; AVX-NEXT:    callq sin
1658; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1659; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1660; AVX-NEXT:    callq sin
1661; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1662; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1663; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1664; AVX-NEXT:    addq $40, %rsp
1665; AVX-NEXT:    .cfi_def_cfa_offset 8
1666; AVX-NEXT:    retq
1667entry:
1668  %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
1669                             <4 x double> <double 42.0, double 42.1,
1670                                           double 42.2, double 42.3>,
1671                             metadata !"round.dynamic",
1672                             metadata !"fpexcept.strict") #0
1673  ret <4 x double> %sin
1674}
1675
1676define <1 x float> @constrained_vector_cos_v1f32() #0 {
1677; CHECK-LABEL: constrained_vector_cos_v1f32:
1678; CHECK:       # %bb.0: # %entry
1679; CHECK-NEXT:    pushq %rax
1680; CHECK-NEXT:    .cfi_def_cfa_offset 16
1681; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1682; CHECK-NEXT:    callq cosf
1683; CHECK-NEXT:    popq %rax
1684; CHECK-NEXT:    .cfi_def_cfa_offset 8
1685; CHECK-NEXT:    retq
1686;
1687; AVX-LABEL: constrained_vector_cos_v1f32:
1688; AVX:       # %bb.0: # %entry
1689; AVX-NEXT:    pushq %rax
1690; AVX-NEXT:    .cfi_def_cfa_offset 16
1691; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1692; AVX-NEXT:    callq cosf
1693; AVX-NEXT:    popq %rax
1694; AVX-NEXT:    .cfi_def_cfa_offset 8
1695; AVX-NEXT:    retq
1696entry:
1697  %cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32(
1698                             <1 x float> <float 42.0>,
1699                             metadata !"round.dynamic",
1700                             metadata !"fpexcept.strict") #0
1701  ret <1 x float> %cos
1702}
1703
1704define <2 x double> @constrained_vector_cos_v2f64() #0 {
1705; CHECK-LABEL: constrained_vector_cos_v2f64:
1706; CHECK:       # %bb.0: # %entry
1707; CHECK-NEXT:    subq $24, %rsp
1708; CHECK-NEXT:    .cfi_def_cfa_offset 32
1709; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1710; CHECK-NEXT:    callq cos
1711; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1712; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1713; CHECK-NEXT:    callq cos
1714; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1715; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1716; CHECK-NEXT:    addq $24, %rsp
1717; CHECK-NEXT:    .cfi_def_cfa_offset 8
1718; CHECK-NEXT:    retq
1719;
1720; AVX-LABEL: constrained_vector_cos_v2f64:
1721; AVX:       # %bb.0: # %entry
1722; AVX-NEXT:    subq $24, %rsp
1723; AVX-NEXT:    .cfi_def_cfa_offset 32
1724; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1725; AVX-NEXT:    callq cos
1726; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1727; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1728; AVX-NEXT:    callq cos
1729; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1730; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1731; AVX-NEXT:    addq $24, %rsp
1732; AVX-NEXT:    .cfi_def_cfa_offset 8
1733; AVX-NEXT:    retq
1734entry:
1735  %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
1736                             <2 x double> <double 42.0, double 42.1>,
1737                             metadata !"round.dynamic",
1738                             metadata !"fpexcept.strict") #0
1739  ret <2 x double> %cos
1740}
1741
1742define <3 x float> @constrained_vector_cos_v3f32() #0 {
1743; CHECK-LABEL: constrained_vector_cos_v3f32:
1744; CHECK:       # %bb.0: # %entry
1745; CHECK-NEXT:    subq $40, %rsp
1746; CHECK-NEXT:    .cfi_def_cfa_offset 48
1747; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1748; CHECK-NEXT:    callq cosf
1749; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1750; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1751; CHECK-NEXT:    callq cosf
1752; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1753; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1754; CHECK-NEXT:    callq cosf
1755; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
1756; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1757; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1758; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1759; CHECK-NEXT:    movaps %xmm1, %xmm0
1760; CHECK-NEXT:    addq $40, %rsp
1761; CHECK-NEXT:    .cfi_def_cfa_offset 8
1762; CHECK-NEXT:    retq
1763;
1764; AVX-LABEL: constrained_vector_cos_v3f32:
1765; AVX:       # %bb.0: # %entry
1766; AVX-NEXT:    subq $40, %rsp
1767; AVX-NEXT:    .cfi_def_cfa_offset 48
1768; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1769; AVX-NEXT:    callq cosf
1770; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1771; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1772; AVX-NEXT:    callq cosf
1773; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1774; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1775; AVX-NEXT:    callq cosf
1776; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1777; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1778; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1779; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1780; AVX-NEXT:    addq $40, %rsp
1781; AVX-NEXT:    .cfi_def_cfa_offset 8
1782; AVX-NEXT:    retq
1783entry:
1784  %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32(
1785                              <3 x float> <float 42.0, float 43.0, float 44.0>,
1786                              metadata !"round.dynamic",
1787                              metadata !"fpexcept.strict") #0
1788  ret <3 x float> %cos
1789}
1790
1791define <3 x double> @constrained_vector_cos_v3f64() #0 {
1792; CHECK-LABEL: constrained_vector_cos_v3f64:
1793; CHECK:       # %bb.0: # %entry
1794; CHECK-NEXT:    subq $24, %rsp
1795; CHECK-NEXT:    .cfi_def_cfa_offset 32
1796; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1797; CHECK-NEXT:    callq cos
1798; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1799; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1800; CHECK-NEXT:    callq cos
1801; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
1802; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1803; CHECK-NEXT:    callq cos
1804; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
1805; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
1806; CHECK-NEXT:    wait
1807; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
1808; CHECK-NEXT:    # xmm0 = mem[0],zero
1809; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1810; CHECK-NEXT:    # xmm1 = mem[0],zero
1811; CHECK-NEXT:    addq $24, %rsp
1812; CHECK-NEXT:    .cfi_def_cfa_offset 8
1813; CHECK-NEXT:    retq
1814;
1815; AVX-LABEL: constrained_vector_cos_v3f64:
1816; AVX:       # %bb.0: # %entry
1817; AVX-NEXT:    subq $56, %rsp
1818; AVX-NEXT:    .cfi_def_cfa_offset 64
1819; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1820; AVX-NEXT:    callq cos
1821; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1822; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1823; AVX-NEXT:    callq cos
1824; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1825; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1826; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
1827; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1828; AVX-NEXT:    vzeroupper
1829; AVX-NEXT:    callq cos
1830; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1831; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1832; AVX-NEXT:    addq $56, %rsp
1833; AVX-NEXT:    .cfi_def_cfa_offset 8
1834; AVX-NEXT:    retq
1835entry:
1836  %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64(
1837                          <3 x double> <double 42.0, double 42.1, double 42.2>,
1838                          metadata !"round.dynamic",
1839                          metadata !"fpexcept.strict") #0
1840  ret <3 x double> %cos
1841}
1842
1843define <4 x double> @constrained_vector_cos_v4f64() #0 {
1844; CHECK-LABEL: constrained_vector_cos_v4f64:
1845; CHECK:       # %bb.0: # %entry
1846; CHECK-NEXT:    subq $40, %rsp
1847; CHECK-NEXT:    .cfi_def_cfa_offset 48
1848; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1849; CHECK-NEXT:    callq cos
1850; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1851; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1852; CHECK-NEXT:    callq cos
1853; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1854; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1855; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1856; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1857; CHECK-NEXT:    callq cos
1858; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1859; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1860; CHECK-NEXT:    callq cos
1861; CHECK-NEXT:    movaps %xmm0, %xmm1
1862; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1863; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1864; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1865; CHECK-NEXT:    addq $40, %rsp
1866; CHECK-NEXT:    .cfi_def_cfa_offset 8
1867; CHECK-NEXT:    retq
1868;
1869; AVX-LABEL: constrained_vector_cos_v4f64:
1870; AVX:       # %bb.0: # %entry
1871; AVX-NEXT:    subq $40, %rsp
1872; AVX-NEXT:    .cfi_def_cfa_offset 48
1873; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1874; AVX-NEXT:    callq cos
1875; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1876; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1877; AVX-NEXT:    callq cos
1878; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1879; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1880; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1881; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1882; AVX-NEXT:    callq cos
1883; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1884; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1885; AVX-NEXT:    callq cos
1886; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1887; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1888; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1889; AVX-NEXT:    addq $40, %rsp
1890; AVX-NEXT:    .cfi_def_cfa_offset 8
1891; AVX-NEXT:    retq
1892entry:
1893  %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
1894                             <4 x double> <double 42.0, double 42.1,
1895                                           double 42.2, double 42.3>,
1896                             metadata !"round.dynamic",
1897                             metadata !"fpexcept.strict") #0
1898  ret <4 x double> %cos
1899}
1900
1901define <1 x float> @constrained_vector_exp_v1f32() #0 {
1902; CHECK-LABEL: constrained_vector_exp_v1f32:
1903; CHECK:       # %bb.0: # %entry
1904; CHECK-NEXT:    pushq %rax
1905; CHECK-NEXT:    .cfi_def_cfa_offset 16
1906; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1907; CHECK-NEXT:    callq expf
1908; CHECK-NEXT:    popq %rax
1909; CHECK-NEXT:    .cfi_def_cfa_offset 8
1910; CHECK-NEXT:    retq
1911;
1912; AVX-LABEL: constrained_vector_exp_v1f32:
1913; AVX:       # %bb.0: # %entry
1914; AVX-NEXT:    pushq %rax
1915; AVX-NEXT:    .cfi_def_cfa_offset 16
1916; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1917; AVX-NEXT:    callq expf
1918; AVX-NEXT:    popq %rax
1919; AVX-NEXT:    .cfi_def_cfa_offset 8
1920; AVX-NEXT:    retq
1921entry:
1922  %exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32(
1923                             <1 x float> <float 42.0>,
1924                             metadata !"round.dynamic",
1925                             metadata !"fpexcept.strict") #0
1926  ret <1 x float> %exp
1927}
1928
1929define <2 x double> @constrained_vector_exp_v2f64() #0 {
1930; CHECK-LABEL: constrained_vector_exp_v2f64:
1931; CHECK:       # %bb.0: # %entry
1932; CHECK-NEXT:    subq $24, %rsp
1933; CHECK-NEXT:    .cfi_def_cfa_offset 32
1934; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1935; CHECK-NEXT:    callq exp
1936; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1937; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1938; CHECK-NEXT:    callq exp
1939; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1940; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1941; CHECK-NEXT:    addq $24, %rsp
1942; CHECK-NEXT:    .cfi_def_cfa_offset 8
1943; CHECK-NEXT:    retq
1944;
1945; AVX-LABEL: constrained_vector_exp_v2f64:
1946; AVX:       # %bb.0: # %entry
1947; AVX-NEXT:    subq $24, %rsp
1948; AVX-NEXT:    .cfi_def_cfa_offset 32
1949; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1950; AVX-NEXT:    callq exp
1951; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1952; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1953; AVX-NEXT:    callq exp
1954; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1955; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1956; AVX-NEXT:    addq $24, %rsp
1957; AVX-NEXT:    .cfi_def_cfa_offset 8
1958; AVX-NEXT:    retq
1959entry:
1960  %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
1961                             <2 x double> <double 42.0, double 42.1>,
1962                             metadata !"round.dynamic",
1963                             metadata !"fpexcept.strict") #0
1964  ret <2 x double> %exp
1965}
1966
1967define <3 x float> @constrained_vector_exp_v3f32() #0 {
1968; CHECK-LABEL: constrained_vector_exp_v3f32:
1969; CHECK:       # %bb.0: # %entry
1970; CHECK-NEXT:    subq $40, %rsp
1971; CHECK-NEXT:    .cfi_def_cfa_offset 48
1972; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1973; CHECK-NEXT:    callq expf
1974; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1975; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1976; CHECK-NEXT:    callq expf
1977; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1978; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1979; CHECK-NEXT:    callq expf
1980; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
1981; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1982; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1983; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1984; CHECK-NEXT:    movaps %xmm1, %xmm0
1985; CHECK-NEXT:    addq $40, %rsp
1986; CHECK-NEXT:    .cfi_def_cfa_offset 8
1987; CHECK-NEXT:    retq
1988;
1989; AVX-LABEL: constrained_vector_exp_v3f32:
1990; AVX:       # %bb.0: # %entry
1991; AVX-NEXT:    subq $40, %rsp
1992; AVX-NEXT:    .cfi_def_cfa_offset 48
1993; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1994; AVX-NEXT:    callq expf
1995; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1996; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1997; AVX-NEXT:    callq expf
1998; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1999; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2000; AVX-NEXT:    callq expf
2001; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
2002; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2003; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2004; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2005; AVX-NEXT:    addq $40, %rsp
2006; AVX-NEXT:    .cfi_def_cfa_offset 8
2007; AVX-NEXT:    retq
2008entry:
2009  %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32(
2010                              <3 x float> <float 42.0, float 43.0, float 44.0>,
2011                              metadata !"round.dynamic",
2012                              metadata !"fpexcept.strict") #0
2013  ret <3 x float> %exp
2014}
2015
2016define <3 x double> @constrained_vector_exp_v3f64() #0 {
2017; CHECK-LABEL: constrained_vector_exp_v3f64:
2018; CHECK:       # %bb.0: # %entry
2019; CHECK-NEXT:    subq $24, %rsp
2020; CHECK-NEXT:    .cfi_def_cfa_offset 32
2021; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2022; CHECK-NEXT:    callq exp
2023; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2024; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2025; CHECK-NEXT:    callq exp
2026; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
2027; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2028; CHECK-NEXT:    callq exp
2029; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
2030; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
2031; CHECK-NEXT:    wait
2032; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
2033; CHECK-NEXT:    # xmm0 = mem[0],zero
2034; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
2035; CHECK-NEXT:    # xmm1 = mem[0],zero
2036; CHECK-NEXT:    addq $24, %rsp
2037; CHECK-NEXT:    .cfi_def_cfa_offset 8
2038; CHECK-NEXT:    retq
2039;
2040; AVX-LABEL: constrained_vector_exp_v3f64:
2041; AVX:       # %bb.0: # %entry
2042; AVX-NEXT:    subq $56, %rsp
2043; AVX-NEXT:    .cfi_def_cfa_offset 64
2044; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2045; AVX-NEXT:    callq exp
2046; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2047; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2048; AVX-NEXT:    callq exp
2049; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2050; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2051; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
2052; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2053; AVX-NEXT:    vzeroupper
2054; AVX-NEXT:    callq exp
2055; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
2056; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2057; AVX-NEXT:    addq $56, %rsp
2058; AVX-NEXT:    .cfi_def_cfa_offset 8
2059; AVX-NEXT:    retq
2060entry:
2061  %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64(
2062                          <3 x double> <double 42.0, double 42.1, double 42.2>,
2063                          metadata !"round.dynamic",
2064                          metadata !"fpexcept.strict") #0
2065  ret <3 x double> %exp
2066}
2067
2068define <4 x double> @constrained_vector_exp_v4f64() #0 {
2069; CHECK-LABEL: constrained_vector_exp_v4f64:
2070; CHECK:       # %bb.0: # %entry
2071; CHECK-NEXT:    subq $40, %rsp
2072; CHECK-NEXT:    .cfi_def_cfa_offset 48
2073; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2074; CHECK-NEXT:    callq exp
2075; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2076; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2077; CHECK-NEXT:    callq exp
2078; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2079; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2080; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2081; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2082; CHECK-NEXT:    callq exp
2083; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2084; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2085; CHECK-NEXT:    callq exp
2086; CHECK-NEXT:    movaps %xmm0, %xmm1
2087; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2088; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2089; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2090; CHECK-NEXT:    addq $40, %rsp
2091; CHECK-NEXT:    .cfi_def_cfa_offset 8
2092; CHECK-NEXT:    retq
2093;
2094; AVX-LABEL: constrained_vector_exp_v4f64:
2095; AVX:       # %bb.0: # %entry
2096; AVX-NEXT:    subq $40, %rsp
2097; AVX-NEXT:    .cfi_def_cfa_offset 48
2098; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2099; AVX-NEXT:    callq exp
2100; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2101; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2102; AVX-NEXT:    callq exp
2103; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2104; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2105; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2106; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2107; AVX-NEXT:    callq exp
2108; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2109; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2110; AVX-NEXT:    callq exp
2111; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2112; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2113; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2114; AVX-NEXT:    addq $40, %rsp
2115; AVX-NEXT:    .cfi_def_cfa_offset 8
2116; AVX-NEXT:    retq
2117entry:
2118  %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
2119                             <4 x double> <double 42.0, double 42.1,
2120                                           double 42.2, double 42.3>,
2121                             metadata !"round.dynamic",
2122                             metadata !"fpexcept.strict") #0
2123  ret <4 x double> %exp
2124}
2125
2126define <1 x float> @constrained_vector_exp2_v1f32() #0 {
2127; CHECK-LABEL: constrained_vector_exp2_v1f32:
2128; CHECK:       # %bb.0: # %entry
2129; CHECK-NEXT:    pushq %rax
2130; CHECK-NEXT:    .cfi_def_cfa_offset 16
2131; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2132; CHECK-NEXT:    callq exp2f
2133; CHECK-NEXT:    popq %rax
2134; CHECK-NEXT:    .cfi_def_cfa_offset 8
2135; CHECK-NEXT:    retq
2136;
2137; AVX-LABEL: constrained_vector_exp2_v1f32:
2138; AVX:       # %bb.0: # %entry
2139; AVX-NEXT:    pushq %rax
2140; AVX-NEXT:    .cfi_def_cfa_offset 16
2141; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2142; AVX-NEXT:    callq exp2f
2143; AVX-NEXT:    popq %rax
2144; AVX-NEXT:    .cfi_def_cfa_offset 8
2145; AVX-NEXT:    retq
2146entry:
2147  %exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32(
2148                             <1 x float> <float 42.0>,
2149                             metadata !"round.dynamic",
2150                             metadata !"fpexcept.strict") #0
2151  ret <1 x float> %exp2
2152}
2153
2154define <2 x double> @constrained_vector_exp2_v2f64() #0 {
2155; CHECK-LABEL: constrained_vector_exp2_v2f64:
2156; CHECK:       # %bb.0: # %entry
2157; CHECK-NEXT:    subq $24, %rsp
2158; CHECK-NEXT:    .cfi_def_cfa_offset 32
2159; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2160; CHECK-NEXT:    callq exp2
2161; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2162; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2163; CHECK-NEXT:    callq exp2
2164; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2165; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2166; CHECK-NEXT:    addq $24, %rsp
2167; CHECK-NEXT:    .cfi_def_cfa_offset 8
2168; CHECK-NEXT:    retq
2169;
2170; AVX-LABEL: constrained_vector_exp2_v2f64:
2171; AVX:       # %bb.0: # %entry
2172; AVX-NEXT:    subq $24, %rsp
2173; AVX-NEXT:    .cfi_def_cfa_offset 32
2174; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2175; AVX-NEXT:    callq exp2
2176; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2177; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2178; AVX-NEXT:    callq exp2
2179; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2180; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2181; AVX-NEXT:    addq $24, %rsp
2182; AVX-NEXT:    .cfi_def_cfa_offset 8
2183; AVX-NEXT:    retq
2184entry:
2185  %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
2186                              <2 x double> <double 42.1, double 42.0>,
2187                              metadata !"round.dynamic",
2188                              metadata !"fpexcept.strict") #0
2189  ret <2 x double> %exp2
2190}
2191
2192define <3 x float> @constrained_vector_exp2_v3f32() #0 {
2193; CHECK-LABEL: constrained_vector_exp2_v3f32:
2194; CHECK:       # %bb.0: # %entry
2195; CHECK-NEXT:    subq $40, %rsp
2196; CHECK-NEXT:    .cfi_def_cfa_offset 48
2197; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2198; CHECK-NEXT:    callq exp2f
2199; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2200; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2201; CHECK-NEXT:    callq exp2f
2202; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2203; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2204; CHECK-NEXT:    callq exp2f
2205; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
2206; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2207; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2208; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2209; CHECK-NEXT:    movaps %xmm1, %xmm0
2210; CHECK-NEXT:    addq $40, %rsp
2211; CHECK-NEXT:    .cfi_def_cfa_offset 8
2212; CHECK-NEXT:    retq
2213;
2214; AVX-LABEL: constrained_vector_exp2_v3f32:
2215; AVX:       # %bb.0: # %entry
2216; AVX-NEXT:    subq $40, %rsp
2217; AVX-NEXT:    .cfi_def_cfa_offset 48
2218; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2219; AVX-NEXT:    callq exp2f
2220; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2221; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2222; AVX-NEXT:    callq exp2f
2223; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2224; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2225; AVX-NEXT:    callq exp2f
2226; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
2227; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2228; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2229; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2230; AVX-NEXT:    addq $40, %rsp
2231; AVX-NEXT:    .cfi_def_cfa_offset 8
2232; AVX-NEXT:    retq
2233entry:
2234  %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32(
2235                              <3 x float> <float 42.0, float 43.0, float 44.0>,
2236                              metadata !"round.dynamic",
2237                              metadata !"fpexcept.strict") #0
2238  ret <3 x float> %exp2
2239}
2240
2241define <3 x double> @constrained_vector_exp2_v3f64() #0 {
2242; CHECK-LABEL: constrained_vector_exp2_v3f64:
2243; CHECK:       # %bb.0: # %entry
2244; CHECK-NEXT:    subq $24, %rsp
2245; CHECK-NEXT:    .cfi_def_cfa_offset 32
2246; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2247; CHECK-NEXT:    callq exp2
2248; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2249; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2250; CHECK-NEXT:    callq exp2
2251; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
2252; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2253; CHECK-NEXT:    callq exp2
2254; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
2255; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
2256; CHECK-NEXT:    wait
2257; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
2258; CHECK-NEXT:    # xmm0 = mem[0],zero
2259; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
2260; CHECK-NEXT:    # xmm1 = mem[0],zero
2261; CHECK-NEXT:    addq $24, %rsp
2262; CHECK-NEXT:    .cfi_def_cfa_offset 8
2263; CHECK-NEXT:    retq
2264;
2265; AVX-LABEL: constrained_vector_exp2_v3f64:
2266; AVX:       # %bb.0: # %entry
2267; AVX-NEXT:    subq $56, %rsp
2268; AVX-NEXT:    .cfi_def_cfa_offset 64
2269; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2270; AVX-NEXT:    callq exp2
2271; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2272; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2273; AVX-NEXT:    callq exp2
2274; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2275; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2276; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
2277; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2278; AVX-NEXT:    vzeroupper
2279; AVX-NEXT:    callq exp2
2280; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
2281; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2282; AVX-NEXT:    addq $56, %rsp
2283; AVX-NEXT:    .cfi_def_cfa_offset 8
2284; AVX-NEXT:    retq
2285entry:
2286  %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64(
2287                          <3 x double> <double 42.0, double 42.1, double 42.2>,
2288                          metadata !"round.dynamic",
2289                          metadata !"fpexcept.strict") #0
2290  ret <3 x double> %exp2
2291}
2292
2293define <4 x double> @constrained_vector_exp2_v4f64() #0 {
2294; CHECK-LABEL: constrained_vector_exp2_v4f64:
2295; CHECK:       # %bb.0: # %entry
2296; CHECK-NEXT:    subq $40, %rsp
2297; CHECK-NEXT:    .cfi_def_cfa_offset 48
2298; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2299; CHECK-NEXT:    callq exp2
2300; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2301; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2302; CHECK-NEXT:    callq exp2
2303; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2304; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2305; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2306; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2307; CHECK-NEXT:    callq exp2
2308; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2309; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2310; CHECK-NEXT:    callq exp2
2311; CHECK-NEXT:    movaps %xmm0, %xmm1
2312; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2313; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2314; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2315; CHECK-NEXT:    addq $40, %rsp
2316; CHECK-NEXT:    .cfi_def_cfa_offset 8
2317; CHECK-NEXT:    retq
2318;
2319; AVX-LABEL: constrained_vector_exp2_v4f64:
2320; AVX:       # %bb.0: # %entry
2321; AVX-NEXT:    subq $40, %rsp
2322; AVX-NEXT:    .cfi_def_cfa_offset 48
2323; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2324; AVX-NEXT:    callq exp2
2325; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2326; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2327; AVX-NEXT:    callq exp2
2328; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2329; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2330; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2331; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2332; AVX-NEXT:    callq exp2
2333; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2334; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2335; AVX-NEXT:    callq exp2
2336; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2337; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2338; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2339; AVX-NEXT:    addq $40, %rsp
2340; AVX-NEXT:    .cfi_def_cfa_offset 8
2341; AVX-NEXT:    retq
2342entry:
2343  %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
2344                              <4 x double> <double 42.1, double 42.2,
2345                                            double 42.3, double 42.4>,
2346                              metadata !"round.dynamic",
2347                              metadata !"fpexcept.strict") #0
2348  ret <4 x double> %exp2
2349}
2350
2351define <1 x float> @constrained_vector_log_v1f32() #0 {
2352; CHECK-LABEL: constrained_vector_log_v1f32:
2353; CHECK:       # %bb.0: # %entry
2354; CHECK-NEXT:    pushq %rax
2355; CHECK-NEXT:    .cfi_def_cfa_offset 16
2356; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2357; CHECK-NEXT:    callq logf
2358; CHECK-NEXT:    popq %rax
2359; CHECK-NEXT:    .cfi_def_cfa_offset 8
2360; CHECK-NEXT:    retq
2361;
2362; AVX-LABEL: constrained_vector_log_v1f32:
2363; AVX:       # %bb.0: # %entry
2364; AVX-NEXT:    pushq %rax
2365; AVX-NEXT:    .cfi_def_cfa_offset 16
2366; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2367; AVX-NEXT:    callq logf
2368; AVX-NEXT:    popq %rax
2369; AVX-NEXT:    .cfi_def_cfa_offset 8
2370; AVX-NEXT:    retq
2371entry:
2372  %log = call <1 x float> @llvm.experimental.constrained.log.v1f32(
2373                             <1 x float> <float 42.0>,
2374                             metadata !"round.dynamic",
2375                             metadata !"fpexcept.strict") #0
2376  ret <1 x float> %log
2377}
2378
2379define <2 x double> @constrained_vector_log_v2f64() #0 {
2380; CHECK-LABEL: constrained_vector_log_v2f64:
2381; CHECK:       # %bb.0: # %entry
2382; CHECK-NEXT:    subq $24, %rsp
2383; CHECK-NEXT:    .cfi_def_cfa_offset 32
2384; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2385; CHECK-NEXT:    callq log
2386; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2387; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2388; CHECK-NEXT:    callq log
2389; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2390; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2391; CHECK-NEXT:    addq $24, %rsp
2392; CHECK-NEXT:    .cfi_def_cfa_offset 8
2393; CHECK-NEXT:    retq
2394;
2395; AVX-LABEL: constrained_vector_log_v2f64:
2396; AVX:       # %bb.0: # %entry
2397; AVX-NEXT:    subq $24, %rsp
2398; AVX-NEXT:    .cfi_def_cfa_offset 32
2399; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2400; AVX-NEXT:    callq log
2401; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2402; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2403; AVX-NEXT:    callq log
2404; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2405; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2406; AVX-NEXT:    addq $24, %rsp
2407; AVX-NEXT:    .cfi_def_cfa_offset 8
2408; AVX-NEXT:    retq
2409entry:
2410  %log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
2411                             <2 x double> <double 42.0, double 42.1>,
2412                             metadata !"round.dynamic",
2413                             metadata !"fpexcept.strict") #0
2414  ret <2 x double> %log
2415}
2416
2417define <3 x float> @constrained_vector_log_v3f32() #0 {
2418; CHECK-LABEL: constrained_vector_log_v3f32:
2419; CHECK:       # %bb.0: # %entry
2420; CHECK-NEXT:    subq $40, %rsp
2421; CHECK-NEXT:    .cfi_def_cfa_offset 48
2422; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2423; CHECK-NEXT:    callq logf
2424; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2425; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2426; CHECK-NEXT:    callq logf
2427; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2428; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2429; CHECK-NEXT:    callq logf
2430; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
2431; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2432; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2433; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2434; CHECK-NEXT:    movaps %xmm1, %xmm0
2435; CHECK-NEXT:    addq $40, %rsp
2436; CHECK-NEXT:    .cfi_def_cfa_offset 8
2437; CHECK-NEXT:    retq
2438;
2439; AVX-LABEL: constrained_vector_log_v3f32:
2440; AVX:       # %bb.0: # %entry
2441; AVX-NEXT:    subq $40, %rsp
2442; AVX-NEXT:    .cfi_def_cfa_offset 48
2443; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2444; AVX-NEXT:    callq logf
2445; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2446; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2447; AVX-NEXT:    callq logf
2448; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2449; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2450; AVX-NEXT:    callq logf
2451; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
2452; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2453; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2454; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2455; AVX-NEXT:    addq $40, %rsp
2456; AVX-NEXT:    .cfi_def_cfa_offset 8
2457; AVX-NEXT:    retq
2458entry:
2459  %log = call <3 x float> @llvm.experimental.constrained.log.v3f32(
2460                              <3 x float> <float 42.0, float 43.0, float 44.0>,
2461                              metadata !"round.dynamic",
2462                              metadata !"fpexcept.strict") #0
2463  ret <3 x float> %log
2464}
2465
2466define <3 x double> @constrained_vector_log_v3f64() #0 {
2467; CHECK-LABEL: constrained_vector_log_v3f64:
2468; CHECK:       # %bb.0: # %entry
2469; CHECK-NEXT:    subq $24, %rsp
2470; CHECK-NEXT:    .cfi_def_cfa_offset 32
2471; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2472; CHECK-NEXT:    callq log
2473; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2474; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2475; CHECK-NEXT:    callq log
2476; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
2477; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2478; CHECK-NEXT:    callq log
2479; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
2480; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
2481; CHECK-NEXT:    wait
2482; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
2483; CHECK-NEXT:    # xmm0 = mem[0],zero
2484; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
2485; CHECK-NEXT:    # xmm1 = mem[0],zero
2486; CHECK-NEXT:    addq $24, %rsp
2487; CHECK-NEXT:    .cfi_def_cfa_offset 8
2488; CHECK-NEXT:    retq
2489;
2490; AVX-LABEL: constrained_vector_log_v3f64:
2491; AVX:       # %bb.0: # %entry
2492; AVX-NEXT:    subq $56, %rsp
2493; AVX-NEXT:    .cfi_def_cfa_offset 64
2494; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2495; AVX-NEXT:    callq log
2496; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2497; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2498; AVX-NEXT:    callq log
2499; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2500; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2501; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
2502; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2503; AVX-NEXT:    vzeroupper
2504; AVX-NEXT:    callq log
2505; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
2506; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2507; AVX-NEXT:    addq $56, %rsp
2508; AVX-NEXT:    .cfi_def_cfa_offset 8
2509; AVX-NEXT:    retq
2510entry:
2511  %log = call <3 x double> @llvm.experimental.constrained.log.v3f64(
2512                          <3 x double> <double 42.0, double 42.1, double 42.2>,
2513                          metadata !"round.dynamic",
2514                          metadata !"fpexcept.strict") #0
2515  ret <3 x double> %log
2516}
2517
2518define <4 x double> @constrained_vector_log_v4f64() #0 {
2519; CHECK-LABEL: constrained_vector_log_v4f64:
2520; CHECK:       # %bb.0: # %entry
2521; CHECK-NEXT:    subq $40, %rsp
2522; CHECK-NEXT:    .cfi_def_cfa_offset 48
2523; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2524; CHECK-NEXT:    callq log
2525; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2526; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2527; CHECK-NEXT:    callq log
2528; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2529; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2530; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2531; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2532; CHECK-NEXT:    callq log
2533; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2534; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2535; CHECK-NEXT:    callq log
2536; CHECK-NEXT:    movaps %xmm0, %xmm1
2537; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2538; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2539; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2540; CHECK-NEXT:    addq $40, %rsp
2541; CHECK-NEXT:    .cfi_def_cfa_offset 8
2542; CHECK-NEXT:    retq
2543;
2544; AVX-LABEL: constrained_vector_log_v4f64:
2545; AVX:       # %bb.0: # %entry
2546; AVX-NEXT:    subq $40, %rsp
2547; AVX-NEXT:    .cfi_def_cfa_offset 48
2548; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2549; AVX-NEXT:    callq log
2550; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2551; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2552; AVX-NEXT:    callq log
2553; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2554; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2555; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2556; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2557; AVX-NEXT:    callq log
2558; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2559; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2560; AVX-NEXT:    callq log
2561; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2562; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2563; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2564; AVX-NEXT:    addq $40, %rsp
2565; AVX-NEXT:    .cfi_def_cfa_offset 8
2566; AVX-NEXT:    retq
2567entry:
2568  %log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
2569                             <4 x double> <double 42.0, double 42.1,
2570                                           double 42.2, double 42.3>,
2571                             metadata !"round.dynamic",
2572                             metadata !"fpexcept.strict") #0
2573  ret <4 x double> %log
2574}
2575
2576define <1 x float> @constrained_vector_log10_v1f32() #0 {
2577; CHECK-LABEL: constrained_vector_log10_v1f32:
2578; CHECK:       # %bb.0: # %entry
2579; CHECK-NEXT:    pushq %rax
2580; CHECK-NEXT:    .cfi_def_cfa_offset 16
2581; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2582; CHECK-NEXT:    callq log10f
2583; CHECK-NEXT:    popq %rax
2584; CHECK-NEXT:    .cfi_def_cfa_offset 8
2585; CHECK-NEXT:    retq
2586;
2587; AVX-LABEL: constrained_vector_log10_v1f32:
2588; AVX:       # %bb.0: # %entry
2589; AVX-NEXT:    pushq %rax
2590; AVX-NEXT:    .cfi_def_cfa_offset 16
2591; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2592; AVX-NEXT:    callq log10f
2593; AVX-NEXT:    popq %rax
2594; AVX-NEXT:    .cfi_def_cfa_offset 8
2595; AVX-NEXT:    retq
2596entry:
2597  %log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32(
2598                             <1 x float> <float 42.0>,
2599                             metadata !"round.dynamic",
2600                             metadata !"fpexcept.strict") #0
2601  ret <1 x float> %log10
2602}
2603
2604define <2 x double> @constrained_vector_log10_v2f64() #0 {
2605; CHECK-LABEL: constrained_vector_log10_v2f64:
2606; CHECK:       # %bb.0: # %entry
2607; CHECK-NEXT:    subq $24, %rsp
2608; CHECK-NEXT:    .cfi_def_cfa_offset 32
2609; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2610; CHECK-NEXT:    callq log10
2611; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2612; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2613; CHECK-NEXT:    callq log10
2614; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2615; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2616; CHECK-NEXT:    addq $24, %rsp
2617; CHECK-NEXT:    .cfi_def_cfa_offset 8
2618; CHECK-NEXT:    retq
2619;
2620; AVX-LABEL: constrained_vector_log10_v2f64:
2621; AVX:       # %bb.0: # %entry
2622; AVX-NEXT:    subq $24, %rsp
2623; AVX-NEXT:    .cfi_def_cfa_offset 32
2624; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2625; AVX-NEXT:    callq log10
2626; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2627; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2628; AVX-NEXT:    callq log10
2629; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2630; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2631; AVX-NEXT:    addq $24, %rsp
2632; AVX-NEXT:    .cfi_def_cfa_offset 8
2633; AVX-NEXT:    retq
2634entry:
2635  %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
2636                               <2 x double> <double 42.0, double 42.1>,
2637                               metadata !"round.dynamic",
2638                               metadata !"fpexcept.strict") #0
2639  ret <2 x double> %log10
2640}
2641
2642define <3 x float> @constrained_vector_log10_v3f32() #0 {
2643; CHECK-LABEL: constrained_vector_log10_v3f32:
2644; CHECK:       # %bb.0: # %entry
2645; CHECK-NEXT:    subq $40, %rsp
2646; CHECK-NEXT:    .cfi_def_cfa_offset 48
2647; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2648; CHECK-NEXT:    callq log10f
2649; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2650; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2651; CHECK-NEXT:    callq log10f
2652; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2653; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2654; CHECK-NEXT:    callq log10f
2655; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
2656; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2657; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2658; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2659; CHECK-NEXT:    movaps %xmm1, %xmm0
2660; CHECK-NEXT:    addq $40, %rsp
2661; CHECK-NEXT:    .cfi_def_cfa_offset 8
2662; CHECK-NEXT:    retq
2663;
2664; AVX-LABEL: constrained_vector_log10_v3f32:
2665; AVX:       # %bb.0: # %entry
2666; AVX-NEXT:    subq $40, %rsp
2667; AVX-NEXT:    .cfi_def_cfa_offset 48
2668; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2669; AVX-NEXT:    callq log10f
2670; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2671; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2672; AVX-NEXT:    callq log10f
2673; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2674; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2675; AVX-NEXT:    callq log10f
2676; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
2677; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2678; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2679; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2680; AVX-NEXT:    addq $40, %rsp
2681; AVX-NEXT:    .cfi_def_cfa_offset 8
2682; AVX-NEXT:    retq
2683entry:
2684  %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32(
2685                              <3 x float> <float 42.0, float 43.0, float 44.0>,
2686                              metadata !"round.dynamic",
2687                              metadata !"fpexcept.strict") #0
2688  ret <3 x float> %log10
2689}
2690
2691define <3 x double> @constrained_vector_log10_v3f64() #0 {
2692; CHECK-LABEL: constrained_vector_log10_v3f64:
2693; CHECK:       # %bb.0: # %entry
2694; CHECK-NEXT:    subq $24, %rsp
2695; CHECK-NEXT:    .cfi_def_cfa_offset 32
2696; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2697; CHECK-NEXT:    callq log10
2698; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2699; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2700; CHECK-NEXT:    callq log10
2701; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
2702; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2703; CHECK-NEXT:    callq log10
2704; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
2705; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
2706; CHECK-NEXT:    wait
2707; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
2708; CHECK-NEXT:    # xmm0 = mem[0],zero
2709; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
2710; CHECK-NEXT:    # xmm1 = mem[0],zero
2711; CHECK-NEXT:    addq $24, %rsp
2712; CHECK-NEXT:    .cfi_def_cfa_offset 8
2713; CHECK-NEXT:    retq
2714;
2715; AVX-LABEL: constrained_vector_log10_v3f64:
2716; AVX:       # %bb.0: # %entry
2717; AVX-NEXT:    subq $56, %rsp
2718; AVX-NEXT:    .cfi_def_cfa_offset 64
2719; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2720; AVX-NEXT:    callq log10
2721; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2722; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2723; AVX-NEXT:    callq log10
2724; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2725; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2726; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
2727; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2728; AVX-NEXT:    vzeroupper
2729; AVX-NEXT:    callq log10
2730; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
2731; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2732; AVX-NEXT:    addq $56, %rsp
2733; AVX-NEXT:    .cfi_def_cfa_offset 8
2734; AVX-NEXT:    retq
2735entry:
2736  %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64(
2737                          <3 x double> <double 42.0, double 42.1, double 42.2>,
2738                          metadata !"round.dynamic",
2739                          metadata !"fpexcept.strict") #0
2740  ret <3 x double> %log10
2741}
2742
2743define <4 x double> @constrained_vector_log10_v4f64() #0 {
2744; CHECK-LABEL: constrained_vector_log10_v4f64:
2745; CHECK:       # %bb.0: # %entry
2746; CHECK-NEXT:    subq $40, %rsp
2747; CHECK-NEXT:    .cfi_def_cfa_offset 48
2748; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2749; CHECK-NEXT:    callq log10
2750; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2751; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2752; CHECK-NEXT:    callq log10
2753; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2754; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2755; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2756; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2757; CHECK-NEXT:    callq log10
2758; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2759; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2760; CHECK-NEXT:    callq log10
2761; CHECK-NEXT:    movaps %xmm0, %xmm1
2762; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2763; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2764; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2765; CHECK-NEXT:    addq $40, %rsp
2766; CHECK-NEXT:    .cfi_def_cfa_offset 8
2767; CHECK-NEXT:    retq
2768;
2769; AVX-LABEL: constrained_vector_log10_v4f64:
2770; AVX:       # %bb.0: # %entry
2771; AVX-NEXT:    subq $40, %rsp
2772; AVX-NEXT:    .cfi_def_cfa_offset 48
2773; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2774; AVX-NEXT:    callq log10
2775; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2776; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2777; AVX-NEXT:    callq log10
2778; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2779; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2780; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2781; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2782; AVX-NEXT:    callq log10
2783; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2784; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2785; AVX-NEXT:    callq log10
2786; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2787; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2788; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2789; AVX-NEXT:    addq $40, %rsp
2790; AVX-NEXT:    .cfi_def_cfa_offset 8
2791; AVX-NEXT:    retq
2792entry:
2793  %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
2794                               <4 x double> <double 42.0, double 42.1,
2795                                             double 42.2, double 42.3>,
2796                               metadata !"round.dynamic",
2797                               metadata !"fpexcept.strict") #0
2798  ret <4 x double> %log10
2799}
2800
2801define <1 x float> @constrained_vector_log2_v1f32() #0 {
2802; CHECK-LABEL: constrained_vector_log2_v1f32:
2803; CHECK:       # %bb.0: # %entry
2804; CHECK-NEXT:    pushq %rax
2805; CHECK-NEXT:    .cfi_def_cfa_offset 16
2806; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2807; CHECK-NEXT:    callq log2f
2808; CHECK-NEXT:    popq %rax
2809; CHECK-NEXT:    .cfi_def_cfa_offset 8
2810; CHECK-NEXT:    retq
2811;
2812; AVX-LABEL: constrained_vector_log2_v1f32:
2813; AVX:       # %bb.0: # %entry
2814; AVX-NEXT:    pushq %rax
2815; AVX-NEXT:    .cfi_def_cfa_offset 16
2816; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2817; AVX-NEXT:    callq log2f
2818; AVX-NEXT:    popq %rax
2819; AVX-NEXT:    .cfi_def_cfa_offset 8
2820; AVX-NEXT:    retq
2821entry:
2822  %log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32(
2823                             <1 x float> <float 42.0>,
2824                             metadata !"round.dynamic",
2825                             metadata !"fpexcept.strict") #0
2826  ret <1 x float> %log2
2827}
2828
2829define <2 x double> @constrained_vector_log2_v2f64() #0 {
2830; CHECK-LABEL: constrained_vector_log2_v2f64:
2831; CHECK:       # %bb.0: # %entry
2832; CHECK-NEXT:    subq $24, %rsp
2833; CHECK-NEXT:    .cfi_def_cfa_offset 32
2834; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2835; CHECK-NEXT:    callq log2
2836; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2837; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2838; CHECK-NEXT:    callq log2
2839; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2840; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2841; CHECK-NEXT:    addq $24, %rsp
2842; CHECK-NEXT:    .cfi_def_cfa_offset 8
2843; CHECK-NEXT:    retq
2844;
2845; AVX-LABEL: constrained_vector_log2_v2f64:
2846; AVX:       # %bb.0: # %entry
2847; AVX-NEXT:    subq $24, %rsp
2848; AVX-NEXT:    .cfi_def_cfa_offset 32
2849; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2850; AVX-NEXT:    callq log2
2851; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2852; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2853; AVX-NEXT:    callq log2
2854; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2855; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2856; AVX-NEXT:    addq $24, %rsp
2857; AVX-NEXT:    .cfi_def_cfa_offset 8
2858; AVX-NEXT:    retq
2859entry:
2860  %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
2861                              <2 x double> <double 42.0, double 42.1>,
2862                              metadata !"round.dynamic",
2863                              metadata !"fpexcept.strict") #0
2864  ret <2 x double> %log2
2865}
2866
2867define <3 x float> @constrained_vector_log2_v3f32() #0 {
2868; CHECK-LABEL: constrained_vector_log2_v3f32:
2869; CHECK:       # %bb.0: # %entry
2870; CHECK-NEXT:    subq $40, %rsp
2871; CHECK-NEXT:    .cfi_def_cfa_offset 48
2872; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2873; CHECK-NEXT:    callq log2f
2874; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2875; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2876; CHECK-NEXT:    callq log2f
2877; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2878; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2879; CHECK-NEXT:    callq log2f
2880; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
2881; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2882; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2883; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2884; CHECK-NEXT:    movaps %xmm1, %xmm0
2885; CHECK-NEXT:    addq $40, %rsp
2886; CHECK-NEXT:    .cfi_def_cfa_offset 8
2887; CHECK-NEXT:    retq
2888;
2889; AVX-LABEL: constrained_vector_log2_v3f32:
2890; AVX:       # %bb.0: # %entry
2891; AVX-NEXT:    subq $40, %rsp
2892; AVX-NEXT:    .cfi_def_cfa_offset 48
2893; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2894; AVX-NEXT:    callq log2f
2895; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2896; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2897; AVX-NEXT:    callq log2f
2898; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2899; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2900; AVX-NEXT:    callq log2f
2901; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
2902; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2903; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2904; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2905; AVX-NEXT:    addq $40, %rsp
2906; AVX-NEXT:    .cfi_def_cfa_offset 8
2907; AVX-NEXT:    retq
2908entry:
2909  %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32(
2910                              <3 x float> <float 42.0, float 43.0, float 44.0>,
2911                              metadata !"round.dynamic",
2912                              metadata !"fpexcept.strict") #0
2913  ret <3 x float> %log2
2914}
2915
2916define <3 x double> @constrained_vector_log2_v3f64() #0 {
2917; CHECK-LABEL: constrained_vector_log2_v3f64:
2918; CHECK:       # %bb.0: # %entry
2919; CHECK-NEXT:    subq $24, %rsp
2920; CHECK-NEXT:    .cfi_def_cfa_offset 32
2921; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2922; CHECK-NEXT:    callq log2
2923; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2924; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2925; CHECK-NEXT:    callq log2
2926; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
2927; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2928; CHECK-NEXT:    callq log2
2929; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
2930; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
2931; CHECK-NEXT:    wait
2932; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
2933; CHECK-NEXT:    # xmm0 = mem[0],zero
2934; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
2935; CHECK-NEXT:    # xmm1 = mem[0],zero
2936; CHECK-NEXT:    addq $24, %rsp
2937; CHECK-NEXT:    .cfi_def_cfa_offset 8
2938; CHECK-NEXT:    retq
2939;
2940; AVX-LABEL: constrained_vector_log2_v3f64:
2941; AVX:       # %bb.0: # %entry
2942; AVX-NEXT:    subq $56, %rsp
2943; AVX-NEXT:    .cfi_def_cfa_offset 64
2944; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2945; AVX-NEXT:    callq log2
2946; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2947; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2948; AVX-NEXT:    callq log2
2949; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2950; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2951; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
2952; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2953; AVX-NEXT:    vzeroupper
2954; AVX-NEXT:    callq log2
2955; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
2956; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2957; AVX-NEXT:    addq $56, %rsp
2958; AVX-NEXT:    .cfi_def_cfa_offset 8
2959; AVX-NEXT:    retq
2960entry:
2961  %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64(
2962                          <3 x double> <double 42.0, double 42.1, double 42.2>,
2963                          metadata !"round.dynamic",
2964                          metadata !"fpexcept.strict") #0
2965  ret <3 x double> %log2
2966}
2967
2968define <4 x double> @constrained_vector_log2_v4f64() #0 {
2969; CHECK-LABEL: constrained_vector_log2_v4f64:
2970; CHECK:       # %bb.0: # %entry
2971; CHECK-NEXT:    subq $40, %rsp
2972; CHECK-NEXT:    .cfi_def_cfa_offset 48
2973; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2974; CHECK-NEXT:    callq log2
2975; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2976; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2977; CHECK-NEXT:    callq log2
2978; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2979; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2980; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2981; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2982; CHECK-NEXT:    callq log2
2983; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2984; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2985; CHECK-NEXT:    callq log2
2986; CHECK-NEXT:    movaps %xmm0, %xmm1
2987; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2988; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2989; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2990; CHECK-NEXT:    addq $40, %rsp
2991; CHECK-NEXT:    .cfi_def_cfa_offset 8
2992; CHECK-NEXT:    retq
2993;
2994; AVX-LABEL: constrained_vector_log2_v4f64:
2995; AVX:       # %bb.0: # %entry
2996; AVX-NEXT:    subq $40, %rsp
2997; AVX-NEXT:    .cfi_def_cfa_offset 48
2998; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2999; AVX-NEXT:    callq log2
3000; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3001; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3002; AVX-NEXT:    callq log2
3003; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
3004; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3005; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3006; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3007; AVX-NEXT:    callq log2
3008; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3009; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3010; AVX-NEXT:    callq log2
3011; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
3012; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3013; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
3014; AVX-NEXT:    addq $40, %rsp
3015; AVX-NEXT:    .cfi_def_cfa_offset 8
3016; AVX-NEXT:    retq
3017entry:
3018  %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
3019                              <4 x double> <double 42.0, double 42.1,
3020                                            double 42.2, double 42.3>,
3021                              metadata !"round.dynamic",
3022                              metadata !"fpexcept.strict") #0
3023  ret <4 x double> %log2
3024}
3025
3026define <1 x float> @constrained_vector_rint_v1f32() #0 {
3027; CHECK-LABEL: constrained_vector_rint_v1f32:
3028; CHECK:       # %bb.0: # %entry
3029; CHECK-NEXT:    pushq %rax
3030; CHECK-NEXT:    .cfi_def_cfa_offset 16
3031; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3032; CHECK-NEXT:    callq rintf
3033; CHECK-NEXT:    popq %rax
3034; CHECK-NEXT:    .cfi_def_cfa_offset 8
3035; CHECK-NEXT:    retq
3036;
3037; AVX-LABEL: constrained_vector_rint_v1f32:
3038; AVX:       # %bb.0: # %entry
3039; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3040; AVX-NEXT:    vroundss $4, %xmm0, %xmm0, %xmm0
3041; AVX-NEXT:    retq
3042entry:
3043  %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
3044                             <1 x float> <float 42.0>,
3045                             metadata !"round.dynamic",
3046                             metadata !"fpexcept.strict") #0
3047  ret <1 x float> %rint
3048}
3049
3050define <2 x double> @constrained_vector_rint_v2f64() #0 {
3051; CHECK-LABEL: constrained_vector_rint_v2f64:
3052; CHECK:       # %bb.0: # %entry
3053; CHECK-NEXT:    subq $24, %rsp
3054; CHECK-NEXT:    .cfi_def_cfa_offset 32
3055; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3056; CHECK-NEXT:    callq rint
3057; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3058; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3059; CHECK-NEXT:    callq rint
3060; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3061; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3062; CHECK-NEXT:    addq $24, %rsp
3063; CHECK-NEXT:    .cfi_def_cfa_offset 8
3064; CHECK-NEXT:    retq
3065;
3066; AVX-LABEL: constrained_vector_rint_v2f64:
3067; AVX:       # %bb.0: # %entry
3068; AVX-NEXT:    vroundpd $4, {{.*}}(%rip), %xmm0
3069; AVX-NEXT:    retq
3070entry:
3071  %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
3072                        <2 x double> <double 42.1, double 42.0>,
3073                        metadata !"round.dynamic",
3074                        metadata !"fpexcept.strict") #0
3075  ret <2 x double> %rint
3076}
3077
3078define <3 x float> @constrained_vector_rint_v3f32() #0 {
3079; CHECK-LABEL: constrained_vector_rint_v3f32:
3080; CHECK:       # %bb.0: # %entry
3081; CHECK-NEXT:    subq $40, %rsp
3082; CHECK-NEXT:    .cfi_def_cfa_offset 48
3083; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3084; CHECK-NEXT:    callq rintf
3085; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3086; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3087; CHECK-NEXT:    callq rintf
3088; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3089; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3090; CHECK-NEXT:    callq rintf
3091; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
3092; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3093; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3094; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3095; CHECK-NEXT:    movaps %xmm1, %xmm0
3096; CHECK-NEXT:    addq $40, %rsp
3097; CHECK-NEXT:    .cfi_def_cfa_offset 8
3098; CHECK-NEXT:    retq
3099;
3100; AVX-LABEL: constrained_vector_rint_v3f32:
3101; AVX:       # %bb.0: # %entry
3102; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3103; AVX-NEXT:    vroundss $4, %xmm0, %xmm0, %xmm0
3104; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3105; AVX-NEXT:    vroundss $4, %xmm1, %xmm1, %xmm1
3106; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
3107; AVX-NEXT:    vroundss $4, %xmm2, %xmm2, %xmm2
3108; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
3109; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
3110; AVX-NEXT:    retq
3111 entry:
3112  %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
3113                              <3 x float> <float 42.0, float 43.0, float 44.0>,
3114                              metadata !"round.dynamic",
3115                              metadata !"fpexcept.strict") #0
3116  ret <3 x float> %rint
3117}
3118
3119define <3 x double> @constrained_vector_rint_v3f64() #0 {
3120; CHECK-LABEL: constrained_vector_rint_v3f64:
3121; CHECK:       # %bb.0: # %entry
3122; CHECK-NEXT:    subq $24, %rsp
3123; CHECK-NEXT:    .cfi_def_cfa_offset 32
3124; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3125; CHECK-NEXT:    callq rint
3126; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3127; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3128; CHECK-NEXT:    callq rint
3129; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
3130; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3131; CHECK-NEXT:    callq rint
3132; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
3133; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
3134; CHECK-NEXT:    wait
3135; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
3136; CHECK-NEXT:    # xmm0 = mem[0],zero
3137; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
3138; CHECK-NEXT:    # xmm1 = mem[0],zero
3139; CHECK-NEXT:    addq $24, %rsp
3140; CHECK-NEXT:    .cfi_def_cfa_offset 8
3141; CHECK-NEXT:    retq
3142;
3143; AVX-LABEL: constrained_vector_rint_v3f64:
3144; AVX:       # %bb.0: # %entry
3145; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3146; AVX-NEXT:    vroundsd $4, %xmm0, %xmm0, %xmm0
3147; AVX-NEXT:    vroundpd $4, {{.*}}(%rip), %xmm1
3148; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3149; AVX-NEXT:    retq
3150entry:
3151  %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
3152                          <3 x double> <double 42.0, double 42.1, double 42.2>,
3153                          metadata !"round.dynamic",
3154                          metadata !"fpexcept.strict") #0
3155  ret <3 x double> %rint
3156}
3157
3158define <4 x double> @constrained_vector_rint_v4f64() #0 {
3159; CHECK-LABEL: constrained_vector_rint_v4f64:
3160; CHECK:       # %bb.0: # %entry
3161; CHECK-NEXT:    subq $40, %rsp
3162; CHECK-NEXT:    .cfi_def_cfa_offset 48
3163; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3164; CHECK-NEXT:    callq rint
3165; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3166; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3167; CHECK-NEXT:    callq rint
3168; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3169; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3170; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3171; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3172; CHECK-NEXT:    callq rint
3173; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3174; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3175; CHECK-NEXT:    callq rint
3176; CHECK-NEXT:    movaps %xmm0, %xmm1
3177; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3178; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3179; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3180; CHECK-NEXT:    addq $40, %rsp
3181; CHECK-NEXT:    .cfi_def_cfa_offset 8
3182; CHECK-NEXT:    retq
3183;
3184; AVX-LABEL: constrained_vector_rint_v4f64:
3185; AVX:       # %bb.0: # %entry
3186; AVX-NEXT:    vroundpd $4, {{.*}}(%rip), %ymm0
3187; AVX-NEXT:    retq
3188entry:
3189  %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
3190                        <4 x double> <double 42.1, double 42.2,
3191                                      double 42.3, double 42.4>,
3192                        metadata !"round.dynamic",
3193                        metadata !"fpexcept.strict") #0
3194  ret <4 x double> %rint
3195}
3196
3197define <1 x float> @constrained_vector_nearbyint_v1f32() #0 {
3198; CHECK-LABEL: constrained_vector_nearbyint_v1f32:
3199; CHECK:       # %bb.0: # %entry
3200; CHECK-NEXT:    pushq %rax
3201; CHECK-NEXT:    .cfi_def_cfa_offset 16
3202; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3203; CHECK-NEXT:    callq nearbyintf
3204; CHECK-NEXT:    popq %rax
3205; CHECK-NEXT:    .cfi_def_cfa_offset 8
3206; CHECK-NEXT:    retq
3207;
3208; AVX-LABEL: constrained_vector_nearbyint_v1f32:
3209; AVX:       # %bb.0: # %entry
3210; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3211; AVX-NEXT:    vroundss $12, %xmm0, %xmm0, %xmm0
3212; AVX-NEXT:    retq
3213entry:
3214  %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
3215                               <1 x float> <float 42.0>,
3216                               metadata !"round.dynamic",
3217                               metadata !"fpexcept.strict") #0
3218  ret <1 x float> %nearby
3219}
3220
3221define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
3222; CHECK-LABEL: constrained_vector_nearbyint_v2f64:
3223; CHECK:       # %bb.0: # %entry
3224; CHECK-NEXT:    subq $24, %rsp
3225; CHECK-NEXT:    .cfi_def_cfa_offset 32
3226; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3227; CHECK-NEXT:    callq nearbyint
3228; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3229; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3230; CHECK-NEXT:    callq nearbyint
3231; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3232; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3233; CHECK-NEXT:    addq $24, %rsp
3234; CHECK-NEXT:    .cfi_def_cfa_offset 8
3235; CHECK-NEXT:    retq
3236;
3237; AVX-LABEL: constrained_vector_nearbyint_v2f64:
3238; AVX:       # %bb.0: # %entry
3239; AVX-NEXT:    vroundpd $12, {{.*}}(%rip), %xmm0
3240; AVX-NEXT:    retq
3241entry:
3242  %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
3243                                <2 x double> <double 42.1, double 42.0>,
3244                                metadata !"round.dynamic",
3245                                metadata !"fpexcept.strict") #0
3246  ret <2 x double> %nearby
3247}
3248
3249define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
3250; CHECK-LABEL: constrained_vector_nearbyint_v3f32:
3251; CHECK:       # %bb.0: # %entry
3252; CHECK-NEXT:    subq $40, %rsp
3253; CHECK-NEXT:    .cfi_def_cfa_offset 48
3254; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3255; CHECK-NEXT:    callq nearbyintf
3256; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3257; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3258; CHECK-NEXT:    callq nearbyintf
3259; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3260; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3261; CHECK-NEXT:    callq nearbyintf
3262; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
3263; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3264; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3265; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3266; CHECK-NEXT:    movaps %xmm1, %xmm0
3267; CHECK-NEXT:    addq $40, %rsp
3268; CHECK-NEXT:    .cfi_def_cfa_offset 8
3269; CHECK-NEXT:    retq
3270;
3271; AVX-LABEL: constrained_vector_nearbyint_v3f32:
3272; AVX:       # %bb.0: # %entry
3273; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3274; AVX-NEXT:    vroundss $12, %xmm0, %xmm0, %xmm0
3275; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3276; AVX-NEXT:    vroundss $12, %xmm1, %xmm1, %xmm1
3277; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
3278; AVX-NEXT:    vroundss $12, %xmm2, %xmm2, %xmm2
3279; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
3280; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
3281; AVX-NEXT:    retq
3282entry:
3283  %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
3284                              <3 x float> <float 42.0, float 43.0, float 44.0>,
3285                              metadata !"round.dynamic",
3286                              metadata !"fpexcept.strict") #0
3287  ret <3 x float> %nearby
3288}
3289
3290define <3 x double> @constrained_vector_nearby_v3f64() #0 {
3291; CHECK-LABEL: constrained_vector_nearby_v3f64:
3292; CHECK:       # %bb.0: # %entry
3293; CHECK-NEXT:    subq $24, %rsp
3294; CHECK-NEXT:    .cfi_def_cfa_offset 32
3295; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3296; CHECK-NEXT:    callq nearbyint
3297; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3298; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3299; CHECK-NEXT:    callq nearbyint
3300; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
3301; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3302; CHECK-NEXT:    callq nearbyint
3303; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
3304; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
3305; CHECK-NEXT:    wait
3306; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
3307; CHECK-NEXT:    # xmm0 = mem[0],zero
3308; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
3309; CHECK-NEXT:    # xmm1 = mem[0],zero
3310; CHECK-NEXT:    addq $24, %rsp
3311; CHECK-NEXT:    .cfi_def_cfa_offset 8
3312; CHECK-NEXT:    retq
3313;
3314; AVX-LABEL: constrained_vector_nearby_v3f64:
3315; AVX:       # %bb.0: # %entry
3316; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3317; AVX-NEXT:    vroundsd $12, %xmm0, %xmm0, %xmm0
3318; AVX-NEXT:    vroundpd $12, {{.*}}(%rip), %xmm1
3319; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3320; AVX-NEXT:    retq
3321entry:
3322  %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
3323                          <3 x double> <double 42.0, double 42.1, double 42.2>,
3324                          metadata !"round.dynamic",
3325                          metadata !"fpexcept.strict") #0
3326  ret <3 x double> %nearby
3327}
3328
3329define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
3330; CHECK-LABEL: constrained_vector_nearbyint_v4f64:
3331; CHECK:       # %bb.0: # %entry
3332; CHECK-NEXT:    subq $40, %rsp
3333; CHECK-NEXT:    .cfi_def_cfa_offset 48
3334; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3335; CHECK-NEXT:    callq nearbyint
3336; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3337; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3338; CHECK-NEXT:    callq nearbyint
3339; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3340; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3341; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3342; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3343; CHECK-NEXT:    callq nearbyint
3344; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3345; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3346; CHECK-NEXT:    callq nearbyint
3347; CHECK-NEXT:    movaps %xmm0, %xmm1
3348; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3349; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3350; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3351; CHECK-NEXT:    addq $40, %rsp
3352; CHECK-NEXT:    .cfi_def_cfa_offset 8
3353; CHECK-NEXT:    retq
3354;
3355; AVX-LABEL: constrained_vector_nearbyint_v4f64:
3356; AVX:       # %bb.0: # %entry
3357; AVX-NEXT:    vroundpd $12, {{.*}}(%rip), %ymm0
3358; AVX-NEXT:    retq
3359entry:
3360  %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
3361                                <4 x double> <double 42.1, double 42.2,
3362                                              double 42.3, double 42.4>,
3363                                metadata !"round.dynamic",
3364                                metadata !"fpexcept.strict") #0
3365  ret <4 x double> %nearby
3366}
3367
3368define <1 x float> @constrained_vector_maxnum_v1f32() #0 {
3369; CHECK-LABEL: constrained_vector_maxnum_v1f32:
3370; CHECK:       # %bb.0: # %entry
3371; CHECK-NEXT:    pushq %rax
3372; CHECK-NEXT:    .cfi_def_cfa_offset 16
3373; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3374; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3375; CHECK-NEXT:    callq fmaxf
3376; CHECK-NEXT:    popq %rax
3377; CHECK-NEXT:    .cfi_def_cfa_offset 8
3378; CHECK-NEXT:    retq
3379;
3380; AVX-LABEL: constrained_vector_maxnum_v1f32:
3381; AVX:       # %bb.0: # %entry
3382; AVX-NEXT:    pushq %rax
3383; AVX-NEXT:    .cfi_def_cfa_offset 16
3384; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3385; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3386; AVX-NEXT:    callq fmaxf
3387; AVX-NEXT:    popq %rax
3388; AVX-NEXT:    .cfi_def_cfa_offset 8
3389; AVX-NEXT:    retq
3390entry:
3391  %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32(
3392                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
3393                               metadata !"fpexcept.strict") #0
3394  ret <1 x float> %max
3395}
3396
3397define <2 x double> @constrained_vector_maxnum_v2f64() #0 {
3398; CHECK-LABEL: constrained_vector_maxnum_v2f64:
3399; CHECK:       # %bb.0: # %entry
3400; CHECK-NEXT:    subq $24, %rsp
3401; CHECK-NEXT:    .cfi_def_cfa_offset 32
3402; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3403; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3404; CHECK-NEXT:    callq fmax
3405; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3406; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3407; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3408; CHECK-NEXT:    callq fmax
3409; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3410; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3411; CHECK-NEXT:    addq $24, %rsp
3412; CHECK-NEXT:    .cfi_def_cfa_offset 8
3413; CHECK-NEXT:    retq
3414;
3415; AVX-LABEL: constrained_vector_maxnum_v2f64:
3416; AVX:       # %bb.0: # %entry
3417; AVX-NEXT:    subq $24, %rsp
3418; AVX-NEXT:    .cfi_def_cfa_offset 32
3419; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3420; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3421; AVX-NEXT:    callq fmax
3422; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3423; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3424; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3425; AVX-NEXT:    callq fmax
3426; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
3427; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3428; AVX-NEXT:    addq $24, %rsp
3429; AVX-NEXT:    .cfi_def_cfa_offset 8
3430; AVX-NEXT:    retq
3431entry:
3432  %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
3433                                <2 x double> <double 43.0, double 42.0>,
3434                                <2 x double> <double 41.0, double 40.0>,
3435                                metadata !"fpexcept.strict") #0
3436  ret <2 x double> %max
3437}
3438
3439define <3 x float> @constrained_vector_maxnum_v3f32() #0 {
3440; CHECK-LABEL: constrained_vector_maxnum_v3f32:
3441; CHECK:       # %bb.0: # %entry
3442; CHECK-NEXT:    subq $40, %rsp
3443; CHECK-NEXT:    .cfi_def_cfa_offset 48
3444; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3445; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3446; CHECK-NEXT:    callq fmaxf
3447; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3448; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3449; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3450; CHECK-NEXT:    callq fmaxf
3451; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3452; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3453; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3454; CHECK-NEXT:    callq fmaxf
3455; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
3456; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3457; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3458; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3459; CHECK-NEXT:    movaps %xmm1, %xmm0
3460; CHECK-NEXT:    addq $40, %rsp
3461; CHECK-NEXT:    .cfi_def_cfa_offset 8
3462; CHECK-NEXT:    retq
3463;
3464; AVX-LABEL: constrained_vector_maxnum_v3f32:
3465; AVX:       # %bb.0: # %entry
3466; AVX-NEXT:    subq $40, %rsp
3467; AVX-NEXT:    .cfi_def_cfa_offset 48
3468; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3469; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3470; AVX-NEXT:    callq fmaxf
3471; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3472; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3473; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3474; AVX-NEXT:    callq fmaxf
3475; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3476; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3477; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3478; AVX-NEXT:    callq fmaxf
3479; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
3480; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3481; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
3482; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
3483; AVX-NEXT:    addq $40, %rsp
3484; AVX-NEXT:    .cfi_def_cfa_offset 8
3485; AVX-NEXT:    retq
3486entry:
3487  %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32(
3488                              <3 x float> <float 43.0, float 44.0, float 45.0>,
3489                              <3 x float> <float 41.0, float 42.0, float 43.0>,
3490                              metadata !"fpexcept.strict") #0
3491  ret <3 x float> %max
3492}
3493
3494define <3 x double> @constrained_vector_max_v3f64() #0 {
3495; CHECK-LABEL: constrained_vector_max_v3f64:
3496; CHECK:       # %bb.0: # %entry
3497; CHECK-NEXT:    subq $24, %rsp
3498; CHECK-NEXT:    .cfi_def_cfa_offset 32
3499; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3500; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3501; CHECK-NEXT:    callq fmax
3502; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3503; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3504; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3505; CHECK-NEXT:    callq fmax
3506; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
3507; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3508; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3509; CHECK-NEXT:    callq fmax
3510; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
3511; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
3512; CHECK-NEXT:    wait
3513; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
3514; CHECK-NEXT:    # xmm0 = mem[0],zero
3515; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
3516; CHECK-NEXT:    # xmm1 = mem[0],zero
3517; CHECK-NEXT:    addq $24, %rsp
3518; CHECK-NEXT:    .cfi_def_cfa_offset 8
3519; CHECK-NEXT:    retq
3520;
3521; AVX-LABEL: constrained_vector_max_v3f64:
3522; AVX:       # %bb.0: # %entry
3523; AVX-NEXT:    subq $56, %rsp
3524; AVX-NEXT:    .cfi_def_cfa_offset 64
3525; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3526; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3527; AVX-NEXT:    callq fmax
3528; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3529; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3530; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3531; AVX-NEXT:    callq fmax
3532; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
3533; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3534; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
3535; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3536; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3537; AVX-NEXT:    vzeroupper
3538; AVX-NEXT:    callq fmax
3539; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
3540; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3541; AVX-NEXT:    addq $56, %rsp
3542; AVX-NEXT:    .cfi_def_cfa_offset 8
3543; AVX-NEXT:    retq
3544entry:
3545  %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64(
3546                          <3 x double> <double 43.0, double 44.0, double 45.0>,
3547                          <3 x double> <double 40.0, double 41.0, double 42.0>,
3548                          metadata !"fpexcept.strict") #0
3549  ret <3 x double> %max
3550}
3551
3552define <4 x double> @constrained_vector_maxnum_v4f64() #0 {
3553; CHECK-LABEL: constrained_vector_maxnum_v4f64:
3554; CHECK:       # %bb.0: # %entry
3555; CHECK-NEXT:    subq $40, %rsp
3556; CHECK-NEXT:    .cfi_def_cfa_offset 48
3557; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3558; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3559; CHECK-NEXT:    callq fmax
3560; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3561; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3562; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3563; CHECK-NEXT:    callq fmax
3564; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3565; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3566; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3567; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3568; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3569; CHECK-NEXT:    callq fmax
3570; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3571; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3572; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3573; CHECK-NEXT:    callq fmax
3574; CHECK-NEXT:    movaps %xmm0, %xmm1
3575; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3576; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3577; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3578; CHECK-NEXT:    addq $40, %rsp
3579; CHECK-NEXT:    .cfi_def_cfa_offset 8
3580; CHECK-NEXT:    retq
3581;
3582; AVX-LABEL: constrained_vector_maxnum_v4f64:
3583; AVX:       # %bb.0: # %entry
3584; AVX-NEXT:    subq $40, %rsp
3585; AVX-NEXT:    .cfi_def_cfa_offset 48
3586; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3587; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3588; AVX-NEXT:    callq fmax
3589; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3590; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3591; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3592; AVX-NEXT:    callq fmax
3593; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
3594; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3595; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3596; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3597; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3598; AVX-NEXT:    callq fmax
3599; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3600; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3601; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3602; AVX-NEXT:    callq fmax
3603; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
3604; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3605; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
3606; AVX-NEXT:    addq $40, %rsp
3607; AVX-NEXT:    .cfi_def_cfa_offset 8
3608; AVX-NEXT:    retq
3609entry:
3610  %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
3611                                <4 x double> <double 44.0, double 45.0,
3612                                              double 46.0, double 47.0>,
3613                                <4 x double> <double 40.0, double 41.0,
3614                                              double 42.0, double 43.0>,
3615                                metadata !"fpexcept.strict") #0
3616  ret <4 x double> %max
3617}
3618
3619define <1 x float> @constrained_vector_minnum_v1f32() #0 {
3620; CHECK-LABEL: constrained_vector_minnum_v1f32:
3621; CHECK:       # %bb.0: # %entry
3622; CHECK-NEXT:    pushq %rax
3623; CHECK-NEXT:    .cfi_def_cfa_offset 16
3624; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3625; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3626; CHECK-NEXT:    callq fminf
3627; CHECK-NEXT:    popq %rax
3628; CHECK-NEXT:    .cfi_def_cfa_offset 8
3629; CHECK-NEXT:    retq
3630;
3631; AVX-LABEL: constrained_vector_minnum_v1f32:
3632; AVX:       # %bb.0: # %entry
3633; AVX-NEXT:    pushq %rax
3634; AVX-NEXT:    .cfi_def_cfa_offset 16
3635; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3636; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3637; AVX-NEXT:    callq fminf
3638; AVX-NEXT:    popq %rax
3639; AVX-NEXT:    .cfi_def_cfa_offset 8
3640; AVX-NEXT:    retq
3641 entry:
3642  %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32(
3643                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
3644                               metadata !"fpexcept.strict") #0
3645  ret <1 x float> %min
3646}
3647
3648define <2 x double> @constrained_vector_minnum_v2f64() #0 {
3649; CHECK-LABEL: constrained_vector_minnum_v2f64:
3650; CHECK:       # %bb.0: # %entry
3651; CHECK-NEXT:    subq $24, %rsp
3652; CHECK-NEXT:    .cfi_def_cfa_offset 32
3653; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3654; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3655; CHECK-NEXT:    callq fmin
3656; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3657; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3658; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3659; CHECK-NEXT:    callq fmin
3660; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3661; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3662; CHECK-NEXT:    addq $24, %rsp
3663; CHECK-NEXT:    .cfi_def_cfa_offset 8
3664; CHECK-NEXT:    retq
3665;
3666; AVX-LABEL: constrained_vector_minnum_v2f64:
3667; AVX:       # %bb.0: # %entry
3668; AVX-NEXT:    subq $24, %rsp
3669; AVX-NEXT:    .cfi_def_cfa_offset 32
3670; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3671; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3672; AVX-NEXT:    callq fmin
3673; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3674; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3675; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3676; AVX-NEXT:    callq fmin
3677; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
3678; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3679; AVX-NEXT:    addq $24, %rsp
3680; AVX-NEXT:    .cfi_def_cfa_offset 8
3681; AVX-NEXT:    retq
3682entry:
3683  %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
3684                                <2 x double> <double 43.0, double 42.0>,
3685                                <2 x double> <double 41.0, double 40.0>,
3686                                metadata !"fpexcept.strict") #0
3687  ret <2 x double> %min
3688}
3689
3690define <3 x float> @constrained_vector_minnum_v3f32() #0 {
3691; CHECK-LABEL: constrained_vector_minnum_v3f32:
3692; CHECK:       # %bb.0: # %entry
3693; CHECK-NEXT:    subq $40, %rsp
3694; CHECK-NEXT:    .cfi_def_cfa_offset 48
3695; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3696; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3697; CHECK-NEXT:    callq fminf
3698; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3699; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3700; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3701; CHECK-NEXT:    callq fminf
3702; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3703; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3704; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3705; CHECK-NEXT:    callq fminf
3706; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
3707; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3708; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3709; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3710; CHECK-NEXT:    movaps %xmm1, %xmm0
3711; CHECK-NEXT:    addq $40, %rsp
3712; CHECK-NEXT:    .cfi_def_cfa_offset 8
3713; CHECK-NEXT:    retq
3714;
3715; AVX-LABEL: constrained_vector_minnum_v3f32:
3716; AVX:       # %bb.0: # %entry
3717; AVX-NEXT:    subq $40, %rsp
3718; AVX-NEXT:    .cfi_def_cfa_offset 48
3719; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3720; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3721; AVX-NEXT:    callq fminf
3722; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3723; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3724; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3725; AVX-NEXT:    callq fminf
3726; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3727; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3728; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3729; AVX-NEXT:    callq fminf
3730; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
3731; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3732; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
3733; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
3734; AVX-NEXT:    addq $40, %rsp
3735; AVX-NEXT:    .cfi_def_cfa_offset 8
3736; AVX-NEXT:    retq
3737entry:
3738  %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32(
3739                              <3 x float> <float 43.0, float 44.0, float 45.0>,
3740                              <3 x float> <float 41.0, float 42.0, float 43.0>,
3741                              metadata !"fpexcept.strict") #0
3742  ret <3 x float> %min
3743}
3744
3745define <3 x double> @constrained_vector_min_v3f64() #0 {
3746; CHECK-LABEL: constrained_vector_min_v3f64:
3747; CHECK:       # %bb.0: # %entry
3748; CHECK-NEXT:    subq $24, %rsp
3749; CHECK-NEXT:    .cfi_def_cfa_offset 32
3750; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3751; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3752; CHECK-NEXT:    callq fmin
3753; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3754; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3755; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3756; CHECK-NEXT:    callq fmin
3757; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
3758; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3759; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3760; CHECK-NEXT:    callq fmin
3761; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
3762; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
3763; CHECK-NEXT:    wait
3764; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
3765; CHECK-NEXT:    # xmm0 = mem[0],zero
3766; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
3767; CHECK-NEXT:    # xmm1 = mem[0],zero
3768; CHECK-NEXT:    addq $24, %rsp
3769; CHECK-NEXT:    .cfi_def_cfa_offset 8
3770; CHECK-NEXT:    retq
3771;
3772; AVX-LABEL: constrained_vector_min_v3f64:
3773; AVX:       # %bb.0: # %entry
3774; AVX-NEXT:    subq $56, %rsp
3775; AVX-NEXT:    .cfi_def_cfa_offset 64
3776; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3777; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3778; AVX-NEXT:    callq fmin
3779; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3780; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3781; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3782; AVX-NEXT:    callq fmin
3783; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
3784; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3785; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
3786; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3787; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3788; AVX-NEXT:    vzeroupper
3789; AVX-NEXT:    callq fmin
3790; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
3791; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3792; AVX-NEXT:    addq $56, %rsp
3793; AVX-NEXT:    .cfi_def_cfa_offset 8
3794; AVX-NEXT:    retq
3795entry:
3796 %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64(
3797                          <3 x double> <double 43.0, double 44.0, double 45.0>,
3798                          <3 x double> <double 40.0, double 41.0, double 42.0>,
3799                          metadata !"fpexcept.strict") #0
3800  ret <3 x double> %min
3801}
3802
3803define <4 x double> @constrained_vector_minnum_v4f64() #0 {
3804; CHECK-LABEL: constrained_vector_minnum_v4f64:
3805; CHECK:       # %bb.0: # %entry
3806; CHECK-NEXT:    subq $40, %rsp
3807; CHECK-NEXT:    .cfi_def_cfa_offset 48
3808; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3809; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3810; CHECK-NEXT:    callq fmin
3811; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3812; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3813; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3814; CHECK-NEXT:    callq fmin
3815; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3816; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3817; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3818; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3819; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3820; CHECK-NEXT:    callq fmin
3821; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3822; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3823; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3824; CHECK-NEXT:    callq fmin
3825; CHECK-NEXT:    movaps %xmm0, %xmm1
3826; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3827; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3828; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3829; CHECK-NEXT:    addq $40, %rsp
3830; CHECK-NEXT:    .cfi_def_cfa_offset 8
3831; CHECK-NEXT:    retq
3832;
3833; AVX-LABEL: constrained_vector_minnum_v4f64:
3834; AVX:       # %bb.0: # %entry
3835; AVX-NEXT:    subq $40, %rsp
3836; AVX-NEXT:    .cfi_def_cfa_offset 48
3837; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3838; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3839; AVX-NEXT:    callq fmin
3840; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3841; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3842; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3843; AVX-NEXT:    callq fmin
3844; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
3845; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3846; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3847; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3848; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3849; AVX-NEXT:    callq fmin
3850; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3851; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3852; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3853; AVX-NEXT:    callq fmin
3854; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
3855; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3856; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
3857; AVX-NEXT:    addq $40, %rsp
3858; AVX-NEXT:    .cfi_def_cfa_offset 8
3859; AVX-NEXT:    retq
3860entry:
3861  %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
3862                                <4 x double> <double 44.0, double 45.0,
3863                                              double 46.0, double 47.0>,
3864                                <4 x double> <double 40.0, double 41.0,
3865                                              double 42.0, double 43.0>,
3866                                metadata !"fpexcept.strict") #0
3867  ret <4 x double> %min
3868}
3869
3870define <1 x i32> @constrained_vector_fptosi_v1i32_v1f32() #0 {
3871; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f32:
3872; CHECK:       # %bb.0: # %entry
3873; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
3874; CHECK-NEXT:    retq
3875;
3876; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f32:
3877; AVX:       # %bb.0: # %entry
3878; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
3879; AVX-NEXT:    retq
3880entry:
3881  %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32(
3882                               <1 x float><float 42.0>,
3883                               metadata !"fpexcept.strict") #0
3884  ret <1 x i32> %result
3885}
3886
3887define <2 x i32> @constrained_vector_fptosi_v2i32_v2f32() #0 {
3888; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f32:
3889; CHECK:       # %bb.0: # %entry
3890; CHECK-NEXT:    cvttps2dq {{.*}}(%rip), %xmm0
3891; CHECK-NEXT:    retq
3892;
3893; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f32:
3894; AVX:       # %bb.0: # %entry
3895; AVX-NEXT:    vcvttps2dq {{.*}}(%rip), %xmm0
3896; AVX-NEXT:    retq
3897entry:
3898  %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(
3899                                <2 x float><float 42.0, float 43.0>,
3900                                metadata !"fpexcept.strict") #0
3901  ret <2 x i32> %result
3902}
3903
3904define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() #0 {
3905; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f32:
3906; CHECK:       # %bb.0: # %entry
3907; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
3908; CHECK-NEXT:    movd %eax, %xmm1
3909; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
3910; CHECK-NEXT:    movd %eax, %xmm0
3911; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3912; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
3913; CHECK-NEXT:    movd %eax, %xmm1
3914; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3915; CHECK-NEXT:    retq
3916;
3917; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f32:
3918; AVX:       # %bb.0: # %entry
3919; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
3920; AVX-NEXT:    vmovd %eax, %xmm0
3921; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
3922; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
3923; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
3924; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
3925; AVX-NEXT:    retq
3926entry:
3927  %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(
3928                                <3 x float><float 42.0, float 43.0,
3929                                            float 44.0>,
3930                                metadata !"fpexcept.strict") #0
3931  ret <3 x i32> %result
3932}
3933
3934define <4 x i32> @constrained_vector_fptosi_v4i32_v4f32() #0 {
3935; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f32:
3936; CHECK:       # %bb.0: # %entry
3937; CHECK-NEXT:    cvttps2dq {{.*}}(%rip), %xmm0
3938; CHECK-NEXT:    retq
3939;
3940; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f32:
3941; AVX:       # %bb.0: # %entry
3942; AVX-NEXT:    vcvttps2dq {{.*}}(%rip), %xmm0
3943; AVX-NEXT:    retq
3944entry:
3945  %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(
3946                                <4 x float><float 42.0, float 43.0,
3947                                            float 44.0, float 45.0>,
3948                                metadata !"fpexcept.strict") #0
3949  ret <4 x i32> %result
3950}
3951
3952define <1 x i64> @constrained_vector_fptosi_v1i64_v1f32() #0 {
3953; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f32:
3954; CHECK:       # %bb.0: # %entry
3955; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
3956; CHECK-NEXT:    retq
3957;
3958; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f32:
3959; AVX:       # %bb.0: # %entry
3960; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
3961; AVX-NEXT:    retq
3962entry:
3963  %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32(
3964                               <1 x float><float 42.0>,
3965                               metadata !"fpexcept.strict") #0
3966  ret <1 x i64> %result
3967}
3968
3969define <2 x i64> @constrained_vector_fptosi_v2i64_v2f32() #0 {
3970; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f32:
3971; CHECK:       # %bb.0: # %entry
3972; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
3973; CHECK-NEXT:    movq %rax, %xmm1
3974; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
3975; CHECK-NEXT:    movq %rax, %xmm0
3976; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3977; CHECK-NEXT:    retq
3978;
3979; AVX1-LABEL: constrained_vector_fptosi_v2i64_v2f32:
3980; AVX1:       # %bb.0: # %entry
3981; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
3982; AVX1-NEXT:    vmovq %rax, %xmm0
3983; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
3984; AVX1-NEXT:    vmovq %rax, %xmm1
3985; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
3986; AVX1-NEXT:    retq
3987;
3988; AVX512F-LABEL: constrained_vector_fptosi_v2i64_v2f32:
3989; AVX512F:       # %bb.0: # %entry
3990; AVX512F-NEXT:    vcvttss2si {{.*}}(%rip), %rax
3991; AVX512F-NEXT:    vmovq %rax, %xmm0
3992; AVX512F-NEXT:    vcvttss2si {{.*}}(%rip), %rax
3993; AVX512F-NEXT:    vmovq %rax, %xmm1
3994; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
3995; AVX512F-NEXT:    retq
3996;
3997; AVX512DQ-LABEL: constrained_vector_fptosi_v2i64_v2f32:
3998; AVX512DQ:       # %bb.0: # %entry
3999; AVX512DQ-NEXT:    vcvttps2qq {{.*}}(%rip), %zmm0
4000; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4001; AVX512DQ-NEXT:    vzeroupper
4002; AVX512DQ-NEXT:    retq
4003entry:
4004  %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(
4005                                <2 x float><float 42.0, float 43.0>,
4006                                metadata !"fpexcept.strict") #0
4007  ret <2 x i64> %result
4008}
4009
4010define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32() #0 {
4011; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f32:
4012; CHECK:       # %bb.0: # %entry
4013; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rcx
4014; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rdx
4015; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
4016; CHECK-NEXT:    retq
4017;
4018; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f32:
4019; AVX1:       # %bb.0: # %entry
4020; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4021; AVX1-NEXT:    vmovq %rax, %xmm0
4022; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4023; AVX1-NEXT:    vmovq %rax, %xmm1
4024; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4025; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4026; AVX1-NEXT:    vmovq %rax, %xmm1
4027; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4028; AVX1-NEXT:    retq
4029;
4030; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f32:
4031; AVX512:       # %bb.0: # %entry
4032; AVX512-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4033; AVX512-NEXT:    vmovq %rax, %xmm0
4034; AVX512-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4035; AVX512-NEXT:    vmovq %rax, %xmm1
4036; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4037; AVX512-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4038; AVX512-NEXT:    vmovq %rax, %xmm1
4039; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4040; AVX512-NEXT:    retq
4041entry:
4042  %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32(
4043                                <3 x float><float 42.0, float 43.0,
4044                                            float 44.0>,
4045                                metadata !"fpexcept.strict") #0
4046  ret <3 x i64> %result
4047}
4048
4049define <4 x i64> @constrained_vector_fptosi_v4i64_v4f32() #0 {
4050; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f32:
4051; CHECK:       # %bb.0: # %entry
4052; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
4053; CHECK-NEXT:    movq %rax, %xmm1
4054; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
4055; CHECK-NEXT:    movq %rax, %xmm0
4056; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4057; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
4058; CHECK-NEXT:    movq %rax, %xmm2
4059; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
4060; CHECK-NEXT:    movq %rax, %xmm1
4061; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
4062; CHECK-NEXT:    retq
4063;
4064; AVX1-LABEL: constrained_vector_fptosi_v4i64_v4f32:
4065; AVX1:       # %bb.0: # %entry
4066; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4067; AVX1-NEXT:    vmovq %rax, %xmm0
4068; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4069; AVX1-NEXT:    vmovq %rax, %xmm1
4070; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4071; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4072; AVX1-NEXT:    vmovq %rax, %xmm1
4073; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4074; AVX1-NEXT:    vmovq %rax, %xmm2
4075; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
4076; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4077; AVX1-NEXT:    retq
4078;
4079; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f32:
4080; AVX512F:       # %bb.0: # %entry
4081; AVX512F-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4082; AVX512F-NEXT:    vmovq %rax, %xmm0
4083; AVX512F-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4084; AVX512F-NEXT:    vmovq %rax, %xmm1
4085; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4086; AVX512F-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4087; AVX512F-NEXT:    vmovq %rax, %xmm1
4088; AVX512F-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4089; AVX512F-NEXT:    vmovq %rax, %xmm2
4090; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
4091; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
4092; AVX512F-NEXT:    retq
4093;
4094; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f32:
4095; AVX512DQ:       # %bb.0: # %entry
4096; AVX512DQ-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
4097; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
4098; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
4099; AVX512DQ-NEXT:    retq
4100entry:
4101  %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(
4102                                <4 x float><float 42.0, float 43.0,
4103                                            float 44.0, float 45.0>,
4104                                metadata !"fpexcept.strict") #0
4105  ret <4 x i64> %result
4106}
4107
4108define <1 x i32> @constrained_vector_fptosi_v1i32_v1f64() #0 {
4109; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f64:
4110; CHECK:       # %bb.0: # %entry
4111; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
4112; CHECK-NEXT:    retq
4113;
4114; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f64:
4115; AVX:       # %bb.0: # %entry
4116; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
4117; AVX-NEXT:    retq
4118entry:
4119  %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(
4120                               <1 x double><double 42.1>,
4121                               metadata !"fpexcept.strict") #0
4122  ret <1 x i32> %result
4123}
4124
4125
4126define <2 x i32> @constrained_vector_fptosi_v2i32_v2f64() #0 {
4127; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f64:
4128; CHECK:       # %bb.0: # %entry
4129; CHECK-NEXT:    cvttpd2dq {{.*}}(%rip), %xmm0
4130; CHECK-NEXT:    retq
4131;
4132; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f64:
4133; AVX:       # %bb.0: # %entry
4134; AVX-NEXT:    vcvttpd2dqx {{.*}}(%rip), %xmm0
4135; AVX-NEXT:    retq
4136entry:
4137  %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(
4138                                <2 x double><double 42.1, double 42.2>,
4139                                metadata !"fpexcept.strict") #0
4140  ret <2 x i32> %result
4141}
4142
4143define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() #0 {
4144; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f64:
4145; CHECK:       # %bb.0: # %entry
4146; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
4147; CHECK-NEXT:    movd %eax, %xmm1
4148; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
4149; CHECK-NEXT:    movd %eax, %xmm0
4150; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4151; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
4152; CHECK-NEXT:    movd %eax, %xmm1
4153; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4154; CHECK-NEXT:    retq
4155;
4156; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f64:
4157; AVX:       # %bb.0: # %entry
4158; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
4159; AVX-NEXT:    vmovd %eax, %xmm0
4160; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
4161; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4162; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
4163; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
4164; AVX-NEXT:    retq
4165entry:
4166  %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(
4167                                <3 x double><double 42.1, double 42.2,
4168                                             double 42.3>,
4169                                metadata !"fpexcept.strict") #0
4170  ret <3 x i32> %result
4171}
4172
4173define <4 x i32> @constrained_vector_fptosi_v4i32_v4f64() #0 {
4174; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f64:
4175; CHECK:       # %bb.0: # %entry
4176; CHECK-NEXT:    cvttpd2dq {{.*}}(%rip), %xmm1
4177; CHECK-NEXT:    cvttpd2dq {{.*}}(%rip), %xmm0
4178; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4179; CHECK-NEXT:    retq
4180;
4181; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f64:
4182; AVX:       # %bb.0: # %entry
4183; AVX-NEXT:    vcvttpd2dqy {{.*}}(%rip), %xmm0
4184; AVX-NEXT:    retq
4185entry:
4186  %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(
4187                                <4 x double><double 42.1, double 42.2,
4188                                             double 42.3, double 42.4>,
4189                                metadata !"fpexcept.strict") #0
4190  ret <4 x i32> %result
4191}
4192
4193define <1 x i64> @constrained_vector_fptosi_v1i64_v1f64() #0 {
4194; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f64:
4195; CHECK:       # %bb.0: # %entry
4196; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4197; CHECK-NEXT:    retq
4198;
4199; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f64:
4200; AVX:       # %bb.0: # %entry
4201; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4202; AVX-NEXT:    retq
4203entry:
4204  %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(
4205                               <1 x double><double 42.1>,
4206                               metadata !"fpexcept.strict") #0
4207  ret <1 x i64> %result
4208}
4209
4210define <2 x i64> @constrained_vector_fptosi_v2i64_v2f64() #0 {
4211; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f64:
4212; CHECK:       # %bb.0: # %entry
4213; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4214; CHECK-NEXT:    movq %rax, %xmm1
4215; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4216; CHECK-NEXT:    movq %rax, %xmm0
4217; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4218; CHECK-NEXT:    retq
4219;
4220; AVX1-LABEL: constrained_vector_fptosi_v2i64_v2f64:
4221; AVX1:       # %bb.0: # %entry
4222; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4223; AVX1-NEXT:    vmovq %rax, %xmm0
4224; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4225; AVX1-NEXT:    vmovq %rax, %xmm1
4226; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4227; AVX1-NEXT:    retq
4228;
4229; AVX512F-LABEL: constrained_vector_fptosi_v2i64_v2f64:
4230; AVX512F:       # %bb.0: # %entry
4231; AVX512F-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4232; AVX512F-NEXT:    vmovq %rax, %xmm0
4233; AVX512F-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4234; AVX512F-NEXT:    vmovq %rax, %xmm1
4235; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4236; AVX512F-NEXT:    retq
4237;
4238; AVX512DQ-LABEL: constrained_vector_fptosi_v2i64_v2f64:
4239; AVX512DQ:       # %bb.0: # %entry
4240; AVX512DQ-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1]
4241; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
4242; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4243; AVX512DQ-NEXT:    vzeroupper
4244; AVX512DQ-NEXT:    retq
4245entry:
4246  %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(
4247                                <2 x double><double 42.1, double 42.2>,
4248                                metadata !"fpexcept.strict") #0
4249  ret <2 x i64> %result
4250}
4251
4252define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64() #0 {
4253; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f64:
4254; CHECK:       # %bb.0: # %entry
4255; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rcx
4256; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rdx
4257; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4258; CHECK-NEXT:    retq
4259;
4260; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f64:
4261; AVX1:       # %bb.0: # %entry
4262; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4263; AVX1-NEXT:    vmovq %rax, %xmm0
4264; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4265; AVX1-NEXT:    vmovq %rax, %xmm1
4266; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4267; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4268; AVX1-NEXT:    vmovq %rax, %xmm1
4269; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4270; AVX1-NEXT:    retq
4271;
4272; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f64:
4273; AVX512:       # %bb.0: # %entry
4274; AVX512-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4275; AVX512-NEXT:    vmovq %rax, %xmm0
4276; AVX512-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4277; AVX512-NEXT:    vmovq %rax, %xmm1
4278; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4279; AVX512-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4280; AVX512-NEXT:    vmovq %rax, %xmm1
4281; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4282; AVX512-NEXT:    retq
4283entry:
4284  %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64(
4285                                <3 x double><double 42.1, double 42.2,
4286                                             double 42.3>,
4287                                metadata !"fpexcept.strict") #0
4288  ret <3 x i64> %result
4289}
4290
4291define <4 x i64> @constrained_vector_fptosi_v4i64_v4f64() #0 {
4292; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f64:
4293; CHECK:       # %bb.0: # %entry
4294; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4295; CHECK-NEXT:    movq %rax, %xmm1
4296; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4297; CHECK-NEXT:    movq %rax, %xmm0
4298; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4299; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4300; CHECK-NEXT:    movq %rax, %xmm2
4301; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4302; CHECK-NEXT:    movq %rax, %xmm1
4303; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
4304; CHECK-NEXT:    retq
4305;
4306; AVX1-LABEL: constrained_vector_fptosi_v4i64_v4f64:
4307; AVX1:       # %bb.0: # %entry
4308; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4309; AVX1-NEXT:    vmovq %rax, %xmm0
4310; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4311; AVX1-NEXT:    vmovq %rax, %xmm1
4312; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4313; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4314; AVX1-NEXT:    vmovq %rax, %xmm1
4315; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4316; AVX1-NEXT:    vmovq %rax, %xmm2
4317; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
4318; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4319; AVX1-NEXT:    retq
4320;
4321; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f64:
4322; AVX512F:       # %bb.0: # %entry
4323; AVX512F-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4324; AVX512F-NEXT:    vmovq %rax, %xmm0
4325; AVX512F-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4326; AVX512F-NEXT:    vmovq %rax, %xmm1
4327; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4328; AVX512F-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4329; AVX512F-NEXT:    vmovq %rax, %xmm1
4330; AVX512F-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4331; AVX512F-NEXT:    vmovq %rax, %xmm2
4332; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
4333; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
4334; AVX512F-NEXT:    retq
4335;
4336; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f64:
4337; AVX512DQ:       # %bb.0: # %entry
4338; AVX512DQ-NEXT:    vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
4339; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
4340; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
4341; AVX512DQ-NEXT:    retq
4342entry:
4343  %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(
4344                                <4 x double><double 42.1, double 42.2,
4345                                             double 42.3, double 42.4>,
4346                                metadata !"fpexcept.strict") #0
4347  ret <4 x i64> %result
4348}
4349
4350define <1 x i32> @constrained_vector_fptoui_v1i32_v1f32() #0 {
4351; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f32:
4352; CHECK:       # %bb.0: # %entry
4353; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
4354; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
4355; CHECK-NEXT:    retq
4356;
4357; AVX1-LABEL: constrained_vector_fptoui_v1i32_v1f32:
4358; AVX1:       # %bb.0: # %entry
4359; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4360; AVX1-NEXT:    # kill: def $eax killed $eax killed $rax
4361; AVX1-NEXT:    retq
4362;
4363; AVX512-LABEL: constrained_vector_fptoui_v1i32_v1f32:
4364; AVX512:       # %bb.0: # %entry
4365; AVX512-NEXT:    vcvttss2usi {{.*}}(%rip), %eax
4366; AVX512-NEXT:    retq
4367entry:
4368  %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32(
4369                               <1 x float><float 42.0>,
4370                               metadata !"fpexcept.strict") #0
4371  ret <1 x i32> %result
4372}
4373
4374define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32() #0 {
4375; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f32:
4376; CHECK:       # %bb.0: # %entry
4377; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
4378; CHECK-NEXT:    movd %eax, %xmm1
4379; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
4380; CHECK-NEXT:    movd %eax, %xmm0
4381; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4382; CHECK-NEXT:    retq
4383;
4384; AVX1-LABEL: constrained_vector_fptoui_v2i32_v2f32:
4385; AVX1:       # %bb.0: # %entry
4386; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4387; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rcx
4388; AVX1-NEXT:    vmovd %ecx, %xmm0
4389; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4390; AVX1-NEXT:    retq
4391;
4392; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f32:
4393; AVX512:       # %bb.0: # %entry
4394; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,0.0E+0,0.0E+0]
4395; AVX512-NEXT:    vcvttps2udq %zmm0, %zmm0
4396; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4397; AVX512-NEXT:    vzeroupper
4398; AVX512-NEXT:    retq
4399entry:
4400  %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(
4401                                <2 x float><float 42.0, float 43.0>,
4402                                metadata !"fpexcept.strict") #0
4403  ret <2 x i32> %result
4404}
4405
4406define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() #0 {
4407; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f32:
4408; CHECK:       # %bb.0: # %entry
4409; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
4410; CHECK-NEXT:    movd %eax, %xmm1
4411; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
4412; CHECK-NEXT:    movd %eax, %xmm0
4413; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4414; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
4415; CHECK-NEXT:    movd %eax, %xmm1
4416; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4417; CHECK-NEXT:    retq
4418;
4419; AVX1-LABEL: constrained_vector_fptoui_v3i32_v3f32:
4420; AVX1:       # %bb.0: # %entry
4421; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4422; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rcx
4423; AVX1-NEXT:    vmovd %ecx, %xmm0
4424; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4425; AVX1-NEXT:    vcvttss2si {{.*}}(%rip), %rax
4426; AVX1-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
4427; AVX1-NEXT:    retq
4428;
4429; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f32:
4430; AVX512:       # %bb.0: # %entry
4431; AVX512-NEXT:    vcvttss2usi {{.*}}(%rip), %eax
4432; AVX512-NEXT:    vmovd %eax, %xmm0
4433; AVX512-NEXT:    vcvttss2usi {{.*}}(%rip), %eax
4434; AVX512-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4435; AVX512-NEXT:    vcvttss2usi {{.*}}(%rip), %eax
4436; AVX512-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
4437; AVX512-NEXT:    retq
4438entry:
4439  %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(
4440                                <3 x float><float 42.0, float 43.0,
4441                                            float 44.0>,
4442                                metadata !"fpexcept.strict") #0
4443  ret <3 x i32> %result
4444}
4445
4446define <4 x i32> @constrained_vector_fptoui_v4i32_v4f32() #0 {
4447; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f32:
4448; CHECK:       # %bb.0: # %entry
4449; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
4450; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
4451; CHECK-NEXT:    movaps %xmm1, %xmm2
4452; CHECK-NEXT:    cmpltps %xmm0, %xmm2
4453; CHECK-NEXT:    movaps %xmm2, %xmm3
4454; CHECK-NEXT:    andnps {{.*}}(%rip), %xmm3
4455; CHECK-NEXT:    andnps %xmm0, %xmm2
4456; CHECK-NEXT:    subps %xmm2, %xmm1
4457; CHECK-NEXT:    cvttps2dq %xmm1, %xmm0
4458; CHECK-NEXT:    xorps %xmm3, %xmm0
4459; CHECK-NEXT:    retq
4460;
4461; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f32:
4462; AVX1:       # %bb.0: # %entry
4463; AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
4464; AVX1-NEXT:    vmovaps {{.*#+}} xmm1 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
4465; AVX1-NEXT:    vcmpltps %xmm0, %xmm1, %xmm2
4466; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
4467; AVX1-NEXT:    vmovaps {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
4468; AVX1-NEXT:    vblendvps %xmm2, %xmm3, %xmm4, %xmm4
4469; AVX1-NEXT:    vblendvps %xmm2, %xmm3, %xmm0, %xmm0
4470; AVX1-NEXT:    vsubps %xmm0, %xmm1, %xmm0
4471; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
4472; AVX1-NEXT:    vxorps %xmm4, %xmm0, %xmm0
4473; AVX1-NEXT:    retq
4474;
4475; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f32:
4476; AVX512:       # %bb.0: # %entry
4477; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
4478; AVX512-NEXT:    vcvttps2udq %zmm0, %zmm0
4479; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4480; AVX512-NEXT:    vzeroupper
4481; AVX512-NEXT:    retq
4482entry:
4483  %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(
4484                                <4 x float><float 42.0, float 43.0,
4485                                            float 44.0, float 45.0>,
4486                                metadata !"fpexcept.strict") #0
4487  ret <4 x i32> %result
4488}
4489
4490define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 {
4491; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32:
4492; CHECK:       # %bb.0: # %entry
4493; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4494; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
4495; CHECK-NEXT:    comiss %xmm0, %xmm2
4496; CHECK-NEXT:    xorps %xmm1, %xmm1
4497; CHECK-NEXT:    ja .LBB115_2
4498; CHECK-NEXT:  # %bb.1: # %entry
4499; CHECK-NEXT:    movaps %xmm2, %xmm1
4500; CHECK-NEXT:  .LBB115_2: # %entry
4501; CHECK-NEXT:    subss %xmm1, %xmm0
4502; CHECK-NEXT:    cvttss2si %xmm0, %rcx
4503; CHECK-NEXT:    setbe %al
4504; CHECK-NEXT:    movzbl %al, %eax
4505; CHECK-NEXT:    shlq $63, %rax
4506; CHECK-NEXT:    xorq %rcx, %rax
4507; CHECK-NEXT:    retq
4508;
4509; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f32:
4510; AVX1:       # %bb.0: # %entry
4511; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4512; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
4513; AVX1-NEXT:    vcomiss %xmm0, %xmm1
4514; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
4515; AVX1-NEXT:    ja .LBB115_2
4516; AVX1-NEXT:  # %bb.1: # %entry
4517; AVX1-NEXT:    vmovaps %xmm1, %xmm2
4518; AVX1-NEXT:  .LBB115_2: # %entry
4519; AVX1-NEXT:    vsubss %xmm2, %xmm0, %xmm0
4520; AVX1-NEXT:    vcvttss2si %xmm0, %rcx
4521; AVX1-NEXT:    setbe %al
4522; AVX1-NEXT:    movzbl %al, %eax
4523; AVX1-NEXT:    shlq $63, %rax
4524; AVX1-NEXT:    xorq %rcx, %rax
4525; AVX1-NEXT:    retq
4526;
4527; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f32:
4528; AVX512:       # %bb.0: # %entry
4529; AVX512-NEXT:    vcvttss2usi {{.*}}(%rip), %rax
4530; AVX512-NEXT:    retq
4531entry:
4532  %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32(
4533                               <1 x float><float 42.0>,
4534                               metadata !"fpexcept.strict") #0
4535  ret <1 x i64> %result
4536}
4537
4538define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
4539; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32:
4540; CHECK:       # %bb.0: # %entry
4541; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
4542; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
4543; CHECK-NEXT:    comiss %xmm2, %xmm1
4544; CHECK-NEXT:    xorps %xmm0, %xmm0
4545; CHECK-NEXT:    xorps %xmm3, %xmm3
4546; CHECK-NEXT:    ja .LBB116_2
4547; CHECK-NEXT:  # %bb.1: # %entry
4548; CHECK-NEXT:    movaps %xmm1, %xmm3
4549; CHECK-NEXT:  .LBB116_2: # %entry
4550; CHECK-NEXT:    subss %xmm3, %xmm2
4551; CHECK-NEXT:    cvttss2si %xmm2, %rax
4552; CHECK-NEXT:    setbe %cl
4553; CHECK-NEXT:    movzbl %cl, %ecx
4554; CHECK-NEXT:    shlq $63, %rcx
4555; CHECK-NEXT:    xorq %rax, %rcx
4556; CHECK-NEXT:    movq %rcx, %xmm2
4557; CHECK-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
4558; CHECK-NEXT:    comiss %xmm3, %xmm1
4559; CHECK-NEXT:    ja .LBB116_4
4560; CHECK-NEXT:  # %bb.3: # %entry
4561; CHECK-NEXT:    movaps %xmm1, %xmm0
4562; CHECK-NEXT:  .LBB116_4: # %entry
4563; CHECK-NEXT:    subss %xmm0, %xmm3
4564; CHECK-NEXT:    cvttss2si %xmm3, %rax
4565; CHECK-NEXT:    setbe %cl
4566; CHECK-NEXT:    movzbl %cl, %ecx
4567; CHECK-NEXT:    shlq $63, %rcx
4568; CHECK-NEXT:    xorq %rax, %rcx
4569; CHECK-NEXT:    movq %rcx, %xmm0
4570; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
4571; CHECK-NEXT:    retq
4572;
4573; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f32:
4574; AVX1:       # %bb.0: # %entry
4575; AVX1-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
4576; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4577; AVX1-NEXT:    vcomiss %xmm2, %xmm0
4578; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
4579; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
4580; AVX1-NEXT:    ja .LBB116_2
4581; AVX1-NEXT:  # %bb.1: # %entry
4582; AVX1-NEXT:    vmovaps %xmm0, %xmm3
4583; AVX1-NEXT:  .LBB116_2: # %entry
4584; AVX1-NEXT:    vsubss %xmm3, %xmm2, %xmm2
4585; AVX1-NEXT:    vcvttss2si %xmm2, %rax
4586; AVX1-NEXT:    setbe %cl
4587; AVX1-NEXT:    movzbl %cl, %ecx
4588; AVX1-NEXT:    shlq $63, %rcx
4589; AVX1-NEXT:    xorq %rax, %rcx
4590; AVX1-NEXT:    vmovq %rcx, %xmm2
4591; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
4592; AVX1-NEXT:    vcomiss %xmm3, %xmm0
4593; AVX1-NEXT:    ja .LBB116_4
4594; AVX1-NEXT:  # %bb.3: # %entry
4595; AVX1-NEXT:    vmovaps %xmm0, %xmm1
4596; AVX1-NEXT:  .LBB116_4: # %entry
4597; AVX1-NEXT:    vsubss %xmm1, %xmm3, %xmm0
4598; AVX1-NEXT:    vcvttss2si %xmm0, %rax
4599; AVX1-NEXT:    setbe %cl
4600; AVX1-NEXT:    movzbl %cl, %ecx
4601; AVX1-NEXT:    shlq $63, %rcx
4602; AVX1-NEXT:    xorq %rax, %rcx
4603; AVX1-NEXT:    vmovq %rcx, %xmm0
4604; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
4605; AVX1-NEXT:    retq
4606;
4607; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f32:
4608; AVX512F:       # %bb.0: # %entry
4609; AVX512F-NEXT:    vcvttss2usi {{.*}}(%rip), %rax
4610; AVX512F-NEXT:    vmovq %rax, %xmm0
4611; AVX512F-NEXT:    vcvttss2usi {{.*}}(%rip), %rax
4612; AVX512F-NEXT:    vmovq %rax, %xmm1
4613; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4614; AVX512F-NEXT:    retq
4615;
4616; AVX512DQ-LABEL: constrained_vector_fptoui_v2i64_v2f32:
4617; AVX512DQ:       # %bb.0: # %entry
4618; AVX512DQ-NEXT:    vcvttps2uqq {{.*}}(%rip), %zmm0
4619; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4620; AVX512DQ-NEXT:    vzeroupper
4621; AVX512DQ-NEXT:    retq
4622entry:
4623  %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(
4624                                <2 x float><float 42.0, float 43.0>,
4625                                metadata !"fpexcept.strict") #0
4626  ret <2 x i64> %result
4627}
4628
4629define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
4630; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32:
4631; CHECK:       # %bb.0: # %entry
4632; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
4633; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
4634; CHECK-NEXT:    comiss %xmm2, %xmm1
4635; CHECK-NEXT:    xorps %xmm0, %xmm0
4636; CHECK-NEXT:    xorps %xmm3, %xmm3
4637; CHECK-NEXT:    ja .LBB117_2
4638; CHECK-NEXT:  # %bb.1: # %entry
4639; CHECK-NEXT:    movaps %xmm1, %xmm3
4640; CHECK-NEXT:  .LBB117_2: # %entry
4641; CHECK-NEXT:    subss %xmm3, %xmm2
4642; CHECK-NEXT:    cvttss2si %xmm2, %rcx
4643; CHECK-NEXT:    setbe %al
4644; CHECK-NEXT:    movzbl %al, %eax
4645; CHECK-NEXT:    shlq $63, %rax
4646; CHECK-NEXT:    xorq %rcx, %rax
4647; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
4648; CHECK-NEXT:    comiss %xmm2, %xmm1
4649; CHECK-NEXT:    xorps %xmm3, %xmm3
4650; CHECK-NEXT:    ja .LBB117_4
4651; CHECK-NEXT:  # %bb.3: # %entry
4652; CHECK-NEXT:    movaps %xmm1, %xmm3
4653; CHECK-NEXT:  .LBB117_4: # %entry
4654; CHECK-NEXT:    subss %xmm3, %xmm2
4655; CHECK-NEXT:    cvttss2si %xmm2, %rcx
4656; CHECK-NEXT:    setbe %dl
4657; CHECK-NEXT:    movzbl %dl, %edx
4658; CHECK-NEXT:    shlq $63, %rdx
4659; CHECK-NEXT:    xorq %rcx, %rdx
4660; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
4661; CHECK-NEXT:    comiss %xmm2, %xmm1
4662; CHECK-NEXT:    ja .LBB117_6
4663; CHECK-NEXT:  # %bb.5: # %entry
4664; CHECK-NEXT:    movaps %xmm1, %xmm0
4665; CHECK-NEXT:  .LBB117_6: # %entry
4666; CHECK-NEXT:    subss %xmm0, %xmm2
4667; CHECK-NEXT:    cvttss2si %xmm2, %rsi
4668; CHECK-NEXT:    setbe %cl
4669; CHECK-NEXT:    movzbl %cl, %ecx
4670; CHECK-NEXT:    shlq $63, %rcx
4671; CHECK-NEXT:    xorq %rsi, %rcx
4672; CHECK-NEXT:    retq
4673;
4674; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f32:
4675; AVX1:       # %bb.0: # %entry
4676; AVX1-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
4677; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4678; AVX1-NEXT:    vcomiss %xmm2, %xmm0
4679; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
4680; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
4681; AVX1-NEXT:    ja .LBB117_2
4682; AVX1-NEXT:  # %bb.1: # %entry
4683; AVX1-NEXT:    vmovaps %xmm0, %xmm3
4684; AVX1-NEXT:  .LBB117_2: # %entry
4685; AVX1-NEXT:    vsubss %xmm3, %xmm2, %xmm2
4686; AVX1-NEXT:    vcvttss2si %xmm2, %rax
4687; AVX1-NEXT:    setbe %cl
4688; AVX1-NEXT:    movzbl %cl, %ecx
4689; AVX1-NEXT:    shlq $63, %rcx
4690; AVX1-NEXT:    xorq %rax, %rcx
4691; AVX1-NEXT:    vmovq %rcx, %xmm2
4692; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
4693; AVX1-NEXT:    vcomiss %xmm3, %xmm0
4694; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
4695; AVX1-NEXT:    ja .LBB117_4
4696; AVX1-NEXT:  # %bb.3: # %entry
4697; AVX1-NEXT:    vmovaps %xmm0, %xmm4
4698; AVX1-NEXT:  .LBB117_4: # %entry
4699; AVX1-NEXT:    vsubss %xmm4, %xmm3, %xmm3
4700; AVX1-NEXT:    vcvttss2si %xmm3, %rax
4701; AVX1-NEXT:    setbe %cl
4702; AVX1-NEXT:    movzbl %cl, %ecx
4703; AVX1-NEXT:    shlq $63, %rcx
4704; AVX1-NEXT:    xorq %rax, %rcx
4705; AVX1-NEXT:    vmovq %rcx, %xmm3
4706; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
4707; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
4708; AVX1-NEXT:    vcomiss %xmm3, %xmm0
4709; AVX1-NEXT:    ja .LBB117_6
4710; AVX1-NEXT:  # %bb.5: # %entry
4711; AVX1-NEXT:    vmovaps %xmm0, %xmm1
4712; AVX1-NEXT:  .LBB117_6: # %entry
4713; AVX1-NEXT:    vsubss %xmm1, %xmm3, %xmm0
4714; AVX1-NEXT:    vcvttss2si %xmm0, %rax
4715; AVX1-NEXT:    setbe %cl
4716; AVX1-NEXT:    movzbl %cl, %ecx
4717; AVX1-NEXT:    shlq $63, %rcx
4718; AVX1-NEXT:    xorq %rax, %rcx
4719; AVX1-NEXT:    vmovq %rcx, %xmm0
4720; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
4721; AVX1-NEXT:    retq
4722;
4723; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f32:
4724; AVX512:       # %bb.0: # %entry
4725; AVX512-NEXT:    vcvttss2usi {{.*}}(%rip), %rax
4726; AVX512-NEXT:    vmovq %rax, %xmm0
4727; AVX512-NEXT:    vcvttss2usi {{.*}}(%rip), %rax
4728; AVX512-NEXT:    vmovq %rax, %xmm1
4729; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4730; AVX512-NEXT:    vcvttss2usi {{.*}}(%rip), %rax
4731; AVX512-NEXT:    vmovq %rax, %xmm1
4732; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4733; AVX512-NEXT:    retq
4734entry:
4735  %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32(
4736                                <3 x float><float 42.0, float 43.0,
4737                                            float 44.0>,
4738                                metadata !"fpexcept.strict") #0
4739  ret <3 x i64> %result
4740}
4741
4742define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
4743; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32:
4744; CHECK:       # %bb.0: # %entry
4745; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4746; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
4747; CHECK-NEXT:    comiss %xmm0, %xmm2
4748; CHECK-NEXT:    xorps %xmm1, %xmm1
4749; CHECK-NEXT:    xorps %xmm3, %xmm3
4750; CHECK-NEXT:    ja .LBB118_2
4751; CHECK-NEXT:  # %bb.1: # %entry
4752; CHECK-NEXT:    movaps %xmm2, %xmm3
4753; CHECK-NEXT:  .LBB118_2: # %entry
4754; CHECK-NEXT:    subss %xmm3, %xmm0
4755; CHECK-NEXT:    cvttss2si %xmm0, %rcx
4756; CHECK-NEXT:    setbe %al
4757; CHECK-NEXT:    movzbl %al, %eax
4758; CHECK-NEXT:    shlq $63, %rax
4759; CHECK-NEXT:    xorq %rcx, %rax
4760; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4761; CHECK-NEXT:    comiss %xmm0, %xmm2
4762; CHECK-NEXT:    xorps %xmm4, %xmm4
4763; CHECK-NEXT:    ja .LBB118_4
4764; CHECK-NEXT:  # %bb.3: # %entry
4765; CHECK-NEXT:    movaps %xmm2, %xmm4
4766; CHECK-NEXT:  .LBB118_4: # %entry
4767; CHECK-NEXT:    movq %rax, %xmm3
4768; CHECK-NEXT:    subss %xmm4, %xmm0
4769; CHECK-NEXT:    cvttss2si %xmm0, %rax
4770; CHECK-NEXT:    setbe %cl
4771; CHECK-NEXT:    movzbl %cl, %ecx
4772; CHECK-NEXT:    shlq $63, %rcx
4773; CHECK-NEXT:    xorq %rax, %rcx
4774; CHECK-NEXT:    movq %rcx, %xmm0
4775; CHECK-NEXT:    movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
4776; CHECK-NEXT:    comiss %xmm4, %xmm2
4777; CHECK-NEXT:    xorps %xmm5, %xmm5
4778; CHECK-NEXT:    ja .LBB118_6
4779; CHECK-NEXT:  # %bb.5: # %entry
4780; CHECK-NEXT:    movaps %xmm2, %xmm5
4781; CHECK-NEXT:  .LBB118_6: # %entry
4782; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
4783; CHECK-NEXT:    subss %xmm5, %xmm4
4784; CHECK-NEXT:    cvttss2si %xmm4, %rax
4785; CHECK-NEXT:    setbe %cl
4786; CHECK-NEXT:    movzbl %cl, %ecx
4787; CHECK-NEXT:    shlq $63, %rcx
4788; CHECK-NEXT:    xorq %rax, %rcx
4789; CHECK-NEXT:    movq %rcx, %xmm3
4790; CHECK-NEXT:    movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
4791; CHECK-NEXT:    comiss %xmm4, %xmm2
4792; CHECK-NEXT:    ja .LBB118_8
4793; CHECK-NEXT:  # %bb.7: # %entry
4794; CHECK-NEXT:    movaps %xmm2, %xmm1
4795; CHECK-NEXT:  .LBB118_8: # %entry
4796; CHECK-NEXT:    subss %xmm1, %xmm4
4797; CHECK-NEXT:    cvttss2si %xmm4, %rax
4798; CHECK-NEXT:    setbe %cl
4799; CHECK-NEXT:    movzbl %cl, %ecx
4800; CHECK-NEXT:    shlq $63, %rcx
4801; CHECK-NEXT:    xorq %rax, %rcx
4802; CHECK-NEXT:    movq %rcx, %xmm1
4803; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
4804; CHECK-NEXT:    retq
4805;
4806; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f32:
4807; AVX1:       # %bb.0: # %entry
4808; AVX1-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
4809; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4810; AVX1-NEXT:    vcomiss %xmm2, %xmm0
4811; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
4812; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
4813; AVX1-NEXT:    ja .LBB118_2
4814; AVX1-NEXT:  # %bb.1: # %entry
4815; AVX1-NEXT:    vmovaps %xmm0, %xmm3
4816; AVX1-NEXT:  .LBB118_2: # %entry
4817; AVX1-NEXT:    vsubss %xmm3, %xmm2, %xmm2
4818; AVX1-NEXT:    vcvttss2si %xmm2, %rcx
4819; AVX1-NEXT:    setbe %al
4820; AVX1-NEXT:    movzbl %al, %eax
4821; AVX1-NEXT:    shlq $63, %rax
4822; AVX1-NEXT:    xorq %rcx, %rax
4823; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
4824; AVX1-NEXT:    vcomiss %xmm3, %xmm0
4825; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
4826; AVX1-NEXT:    ja .LBB118_4
4827; AVX1-NEXT:  # %bb.3: # %entry
4828; AVX1-NEXT:    vmovaps %xmm0, %xmm4
4829; AVX1-NEXT:  .LBB118_4: # %entry
4830; AVX1-NEXT:    vmovq %rax, %xmm2
4831; AVX1-NEXT:    vsubss %xmm4, %xmm3, %xmm3
4832; AVX1-NEXT:    vcvttss2si %xmm3, %rax
4833; AVX1-NEXT:    setbe %cl
4834; AVX1-NEXT:    movzbl %cl, %ecx
4835; AVX1-NEXT:    shlq $63, %rcx
4836; AVX1-NEXT:    xorq %rax, %rcx
4837; AVX1-NEXT:    vmovq %rcx, %xmm3
4838; AVX1-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
4839; AVX1-NEXT:    vcomiss %xmm4, %xmm0
4840; AVX1-NEXT:    vxorps %xmm5, %xmm5, %xmm5
4841; AVX1-NEXT:    ja .LBB118_6
4842; AVX1-NEXT:  # %bb.5: # %entry
4843; AVX1-NEXT:    vmovaps %xmm0, %xmm5
4844; AVX1-NEXT:  .LBB118_6: # %entry
4845; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
4846; AVX1-NEXT:    vsubss %xmm5, %xmm4, %xmm3
4847; AVX1-NEXT:    vcvttss2si %xmm3, %rax
4848; AVX1-NEXT:    setbe %cl
4849; AVX1-NEXT:    movzbl %cl, %ecx
4850; AVX1-NEXT:    shlq $63, %rcx
4851; AVX1-NEXT:    xorq %rax, %rcx
4852; AVX1-NEXT:    vmovq %rcx, %xmm3
4853; AVX1-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
4854; AVX1-NEXT:    vcomiss %xmm4, %xmm0
4855; AVX1-NEXT:    ja .LBB118_8
4856; AVX1-NEXT:  # %bb.7: # %entry
4857; AVX1-NEXT:    vmovaps %xmm0, %xmm1
4858; AVX1-NEXT:  .LBB118_8: # %entry
4859; AVX1-NEXT:    vsubss %xmm1, %xmm4, %xmm0
4860; AVX1-NEXT:    vcvttss2si %xmm0, %rax
4861; AVX1-NEXT:    setbe %cl
4862; AVX1-NEXT:    movzbl %cl, %ecx
4863; AVX1-NEXT:    shlq $63, %rcx
4864; AVX1-NEXT:    xorq %rax, %rcx
4865; AVX1-NEXT:    vmovq %rcx, %xmm0
4866; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
4867; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
4868; AVX1-NEXT:    retq
4869;
4870; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f32:
4871; AVX512F:       # %bb.0: # %entry
4872; AVX512F-NEXT:    vcvttss2usi {{.*}}(%rip), %rax
4873; AVX512F-NEXT:    vmovq %rax, %xmm0
4874; AVX512F-NEXT:    vcvttss2usi {{.*}}(%rip), %rax
4875; AVX512F-NEXT:    vmovq %rax, %xmm1
4876; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4877; AVX512F-NEXT:    vcvttss2usi {{.*}}(%rip), %rax
4878; AVX512F-NEXT:    vmovq %rax, %xmm1
4879; AVX512F-NEXT:    vcvttss2usi {{.*}}(%rip), %rax
4880; AVX512F-NEXT:    vmovq %rax, %xmm2
4881; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
4882; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
4883; AVX512F-NEXT:    retq
4884;
4885; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f32:
4886; AVX512DQ:       # %bb.0: # %entry
4887; AVX512DQ-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
4888; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
4889; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
4890; AVX512DQ-NEXT:    retq
4891entry:
4892  %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(
4893                                <4 x float><float 42.0, float 43.0,
4894                                            float 44.0, float 45.0>,
4895                                metadata !"fpexcept.strict") #0
4896  ret <4 x i64> %result
4897}
4898
4899define <1 x i32> @constrained_vector_fptoui_v1i32_v1f64() #0 {
4900; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f64:
4901; CHECK:       # %bb.0: # %entry
4902; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4903; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
4904; CHECK-NEXT:    retq
4905;
4906; AVX1-LABEL: constrained_vector_fptoui_v1i32_v1f64:
4907; AVX1:       # %bb.0: # %entry
4908; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4909; AVX1-NEXT:    # kill: def $eax killed $eax killed $rax
4910; AVX1-NEXT:    retq
4911;
4912; AVX512-LABEL: constrained_vector_fptoui_v1i32_v1f64:
4913; AVX512:       # %bb.0: # %entry
4914; AVX512-NEXT:    vcvttsd2usi {{.*}}(%rip), %eax
4915; AVX512-NEXT:    retq
4916entry:
4917  %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(
4918                               <1 x double><double 42.1>,
4919                               metadata !"fpexcept.strict") #0
4920  ret <1 x i32> %result
4921}
4922
4923define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64() #0 {
4924; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f64:
4925; CHECK:       # %bb.0: # %entry
4926; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4927; CHECK-NEXT:    movd %eax, %xmm1
4928; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4929; CHECK-NEXT:    movd %eax, %xmm0
4930; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4931; CHECK-NEXT:    retq
4932;
4933; AVX1-LABEL: constrained_vector_fptoui_v2i32_v2f64:
4934; AVX1:       # %bb.0: # %entry
4935; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4936; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rcx
4937; AVX1-NEXT:    vmovd %ecx, %xmm0
4938; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4939; AVX1-NEXT:    retq
4940;
4941; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f64:
4942; AVX512:       # %bb.0: # %entry
4943; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,0.0E+0,0.0E+0]
4944; AVX512-NEXT:    vcvttpd2udq %zmm0, %ymm0
4945; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
4946; AVX512-NEXT:    vzeroupper
4947; AVX512-NEXT:    retq
4948entry:
4949  %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(
4950                                <2 x double><double 42.1, double 42.2>,
4951                                metadata !"fpexcept.strict") #0
4952  ret <2 x i32> %result
4953}
4954
4955define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() #0 {
4956; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f64:
4957; CHECK:       # %bb.0: # %entry
4958; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4959; CHECK-NEXT:    movd %eax, %xmm1
4960; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4961; CHECK-NEXT:    movd %eax, %xmm0
4962; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4963; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4964; CHECK-NEXT:    movd %eax, %xmm1
4965; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4966; CHECK-NEXT:    retq
4967;
4968; AVX1-LABEL: constrained_vector_fptoui_v3i32_v3f64:
4969; AVX1:       # %bb.0: # %entry
4970; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4971; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rcx
4972; AVX1-NEXT:    vmovd %ecx, %xmm0
4973; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4974; AVX1-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
4975; AVX1-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
4976; AVX1-NEXT:    retq
4977;
4978; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f64:
4979; AVX512:       # %bb.0: # %entry
4980; AVX512-NEXT:    vcvttsd2usi {{.*}}(%rip), %eax
4981; AVX512-NEXT:    vmovd %eax, %xmm0
4982; AVX512-NEXT:    vcvttsd2usi {{.*}}(%rip), %eax
4983; AVX512-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4984; AVX512-NEXT:    vcvttsd2usi {{.*}}(%rip), %eax
4985; AVX512-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
4986; AVX512-NEXT:    retq
4987entry:
4988  %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(
4989                                <3 x double><double 42.1, double 42.2,
4990                                             double 42.3>,
4991                                metadata !"fpexcept.strict") #0
4992  ret <3 x i32> %result
4993}
4994
4995define <4 x i32> @constrained_vector_fptoui_v4i32_v4f64() #0 {
4996; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f64:
4997; CHECK:       # %bb.0: # %entry
4998; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
4999; CHECK-NEXT:    movd %eax, %xmm0
5000; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
5001; CHECK-NEXT:    movd %eax, %xmm1
5002; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
5003; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
5004; CHECK-NEXT:    movd %eax, %xmm2
5005; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
5006; CHECK-NEXT:    movd %eax, %xmm0
5007; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
5008; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5009; CHECK-NEXT:    retq
5010;
5011; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f64:
5012; AVX1:       # %bb.0: # %entry
5013; AVX1-NEXT:    vmovapd {{.*#+}} ymm0 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
5014; AVX1-NEXT:    vmovapd {{.*#+}} ymm1 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
5015; AVX1-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm2
5016; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
5017; AVX1-NEXT:    vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm3[0,2]
5018; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
5019; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
5020; AVX1-NEXT:    vblendvps %xmm3, %xmm4, %xmm5, %xmm3
5021; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
5022; AVX1-NEXT:    vblendvpd %ymm2, %ymm4, %ymm0, %ymm0
5023; AVX1-NEXT:    vsubpd %ymm0, %ymm1, %ymm0
5024; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm0
5025; AVX1-NEXT:    vxorpd %xmm3, %xmm0, %xmm0
5026; AVX1-NEXT:    vzeroupper
5027; AVX1-NEXT:    retq
5028;
5029; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f64:
5030; AVX512:       # %bb.0: # %entry
5031; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
5032; AVX512-NEXT:    vcvttpd2udq %zmm0, %ymm0
5033; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
5034; AVX512-NEXT:    vzeroupper
5035; AVX512-NEXT:    retq
5036entry:
5037  %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(
5038                                <4 x double><double 42.1, double 42.2,
5039                                             double 42.3, double 42.4>,
5040                                metadata !"fpexcept.strict") #0
5041  ret <4 x i32> %result
5042}
5043
5044define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 {
5045; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64:
5046; CHECK:       # %bb.0: # %entry
5047; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5048; CHECK-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
5049; CHECK-NEXT:    comisd %xmm0, %xmm2
5050; CHECK-NEXT:    xorpd %xmm1, %xmm1
5051; CHECK-NEXT:    ja .LBB123_2
5052; CHECK-NEXT:  # %bb.1: # %entry
5053; CHECK-NEXT:    movapd %xmm2, %xmm1
5054; CHECK-NEXT:  .LBB123_2: # %entry
5055; CHECK-NEXT:    subsd %xmm1, %xmm0
5056; CHECK-NEXT:    cvttsd2si %xmm0, %rcx
5057; CHECK-NEXT:    setbe %al
5058; CHECK-NEXT:    movzbl %al, %eax
5059; CHECK-NEXT:    shlq $63, %rax
5060; CHECK-NEXT:    xorq %rcx, %rax
5061; CHECK-NEXT:    retq
5062;
5063; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f64:
5064; AVX1:       # %bb.0: # %entry
5065; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
5066; AVX1-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
5067; AVX1-NEXT:    vcomisd %xmm0, %xmm1
5068; AVX1-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
5069; AVX1-NEXT:    ja .LBB123_2
5070; AVX1-NEXT:  # %bb.1: # %entry
5071; AVX1-NEXT:    vmovapd %xmm1, %xmm2
5072; AVX1-NEXT:  .LBB123_2: # %entry
5073; AVX1-NEXT:    vsubsd %xmm2, %xmm0, %xmm0
5074; AVX1-NEXT:    vcvttsd2si %xmm0, %rcx
5075; AVX1-NEXT:    setbe %al
5076; AVX1-NEXT:    movzbl %al, %eax
5077; AVX1-NEXT:    shlq $63, %rax
5078; AVX1-NEXT:    xorq %rcx, %rax
5079; AVX1-NEXT:    retq
5080;
5081; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f64:
5082; AVX512:       # %bb.0: # %entry
5083; AVX512-NEXT:    vcvttsd2usi {{.*}}(%rip), %rax
5084; AVX512-NEXT:    retq
5085entry:
5086  %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(
5087                               <1 x double><double 42.1>,
5088                               metadata !"fpexcept.strict") #0
5089  ret <1 x i64> %result
5090}
5091
5092define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
5093; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64:
5094; CHECK:       # %bb.0: # %entry
5095; CHECK-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
5096; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
5097; CHECK-NEXT:    comisd %xmm2, %xmm1
5098; CHECK-NEXT:    xorpd %xmm0, %xmm0
5099; CHECK-NEXT:    xorpd %xmm3, %xmm3
5100; CHECK-NEXT:    ja .LBB124_2
5101; CHECK-NEXT:  # %bb.1: # %entry
5102; CHECK-NEXT:    movapd %xmm1, %xmm3
5103; CHECK-NEXT:  .LBB124_2: # %entry
5104; CHECK-NEXT:    subsd %xmm3, %xmm2
5105; CHECK-NEXT:    cvttsd2si %xmm2, %rax
5106; CHECK-NEXT:    setbe %cl
5107; CHECK-NEXT:    movzbl %cl, %ecx
5108; CHECK-NEXT:    shlq $63, %rcx
5109; CHECK-NEXT:    xorq %rax, %rcx
5110; CHECK-NEXT:    movq %rcx, %xmm2
5111; CHECK-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
5112; CHECK-NEXT:    comisd %xmm3, %xmm1
5113; CHECK-NEXT:    ja .LBB124_4
5114; CHECK-NEXT:  # %bb.3: # %entry
5115; CHECK-NEXT:    movapd %xmm1, %xmm0
5116; CHECK-NEXT:  .LBB124_4: # %entry
5117; CHECK-NEXT:    subsd %xmm0, %xmm3
5118; CHECK-NEXT:    cvttsd2si %xmm3, %rax
5119; CHECK-NEXT:    setbe %cl
5120; CHECK-NEXT:    movzbl %cl, %ecx
5121; CHECK-NEXT:    shlq $63, %rcx
5122; CHECK-NEXT:    xorq %rax, %rcx
5123; CHECK-NEXT:    movq %rcx, %xmm0
5124; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
5125; CHECK-NEXT:    retq
5126;
5127; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f64:
5128; AVX1:       # %bb.0: # %entry
5129; AVX1-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
5130; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
5131; AVX1-NEXT:    vcomisd %xmm2, %xmm0
5132; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
5133; AVX1-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
5134; AVX1-NEXT:    ja .LBB124_2
5135; AVX1-NEXT:  # %bb.1: # %entry
5136; AVX1-NEXT:    vmovapd %xmm0, %xmm3
5137; AVX1-NEXT:  .LBB124_2: # %entry
5138; AVX1-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
5139; AVX1-NEXT:    vcvttsd2si %xmm2, %rax
5140; AVX1-NEXT:    setbe %cl
5141; AVX1-NEXT:    movzbl %cl, %ecx
5142; AVX1-NEXT:    shlq $63, %rcx
5143; AVX1-NEXT:    xorq %rax, %rcx
5144; AVX1-NEXT:    vmovq %rcx, %xmm2
5145; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
5146; AVX1-NEXT:    vcomisd %xmm3, %xmm0
5147; AVX1-NEXT:    ja .LBB124_4
5148; AVX1-NEXT:  # %bb.3: # %entry
5149; AVX1-NEXT:    vmovapd %xmm0, %xmm1
5150; AVX1-NEXT:  .LBB124_4: # %entry
5151; AVX1-NEXT:    vsubsd %xmm1, %xmm3, %xmm0
5152; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
5153; AVX1-NEXT:    setbe %cl
5154; AVX1-NEXT:    movzbl %cl, %ecx
5155; AVX1-NEXT:    shlq $63, %rcx
5156; AVX1-NEXT:    xorq %rax, %rcx
5157; AVX1-NEXT:    vmovq %rcx, %xmm0
5158; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
5159; AVX1-NEXT:    retq
5160;
5161; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f64:
5162; AVX512F:       # %bb.0: # %entry
5163; AVX512F-NEXT:    vcvttsd2usi {{.*}}(%rip), %rax
5164; AVX512F-NEXT:    vmovq %rax, %xmm0
5165; AVX512F-NEXT:    vcvttsd2usi {{.*}}(%rip), %rax
5166; AVX512F-NEXT:    vmovq %rax, %xmm1
5167; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
5168; AVX512F-NEXT:    retq
5169;
5170; AVX512DQ-LABEL: constrained_vector_fptoui_v2i64_v2f64:
5171; AVX512DQ:       # %bb.0: # %entry
5172; AVX512DQ-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1]
5173; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
5174; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
5175; AVX512DQ-NEXT:    vzeroupper
5176; AVX512DQ-NEXT:    retq
5177entry:
5178  %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(
5179                                <2 x double><double 42.1, double 42.2>,
5180                                metadata !"fpexcept.strict") #0
5181  ret <2 x i64> %result
5182}
5183
5184define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
5185; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64:
5186; CHECK:       # %bb.0: # %entry
5187; CHECK-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
5188; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
5189; CHECK-NEXT:    comisd %xmm2, %xmm1
5190; CHECK-NEXT:    xorpd %xmm0, %xmm0
5191; CHECK-NEXT:    xorpd %xmm3, %xmm3
5192; CHECK-NEXT:    ja .LBB125_2
5193; CHECK-NEXT:  # %bb.1: # %entry
5194; CHECK-NEXT:    movapd %xmm1, %xmm3
5195; CHECK-NEXT:  .LBB125_2: # %entry
5196; CHECK-NEXT:    subsd %xmm3, %xmm2
5197; CHECK-NEXT:    cvttsd2si %xmm2, %rcx
5198; CHECK-NEXT:    setbe %al
5199; CHECK-NEXT:    movzbl %al, %eax
5200; CHECK-NEXT:    shlq $63, %rax
5201; CHECK-NEXT:    xorq %rcx, %rax
5202; CHECK-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
5203; CHECK-NEXT:    comisd %xmm2, %xmm1
5204; CHECK-NEXT:    xorpd %xmm3, %xmm3
5205; CHECK-NEXT:    ja .LBB125_4
5206; CHECK-NEXT:  # %bb.3: # %entry
5207; CHECK-NEXT:    movapd %xmm1, %xmm3
5208; CHECK-NEXT:  .LBB125_4: # %entry
5209; CHECK-NEXT:    subsd %xmm3, %xmm2
5210; CHECK-NEXT:    cvttsd2si %xmm2, %rcx
5211; CHECK-NEXT:    setbe %dl
5212; CHECK-NEXT:    movzbl %dl, %edx
5213; CHECK-NEXT:    shlq $63, %rdx
5214; CHECK-NEXT:    xorq %rcx, %rdx
5215; CHECK-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
5216; CHECK-NEXT:    comisd %xmm2, %xmm1
5217; CHECK-NEXT:    ja .LBB125_6
5218; CHECK-NEXT:  # %bb.5: # %entry
5219; CHECK-NEXT:    movapd %xmm1, %xmm0
5220; CHECK-NEXT:  .LBB125_6: # %entry
5221; CHECK-NEXT:    subsd %xmm0, %xmm2
5222; CHECK-NEXT:    cvttsd2si %xmm2, %rsi
5223; CHECK-NEXT:    setbe %cl
5224; CHECK-NEXT:    movzbl %cl, %ecx
5225; CHECK-NEXT:    shlq $63, %rcx
5226; CHECK-NEXT:    xorq %rsi, %rcx
5227; CHECK-NEXT:    retq
5228;
5229; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f64:
5230; AVX1:       # %bb.0: # %entry
5231; AVX1-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
5232; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
5233; AVX1-NEXT:    vcomisd %xmm2, %xmm0
5234; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
5235; AVX1-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
5236; AVX1-NEXT:    ja .LBB125_2
5237; AVX1-NEXT:  # %bb.1: # %entry
5238; AVX1-NEXT:    vmovapd %xmm0, %xmm3
5239; AVX1-NEXT:  .LBB125_2: # %entry
5240; AVX1-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
5241; AVX1-NEXT:    vcvttsd2si %xmm2, %rax
5242; AVX1-NEXT:    setbe %cl
5243; AVX1-NEXT:    movzbl %cl, %ecx
5244; AVX1-NEXT:    shlq $63, %rcx
5245; AVX1-NEXT:    xorq %rax, %rcx
5246; AVX1-NEXT:    vmovq %rcx, %xmm2
5247; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
5248; AVX1-NEXT:    vcomisd %xmm3, %xmm0
5249; AVX1-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
5250; AVX1-NEXT:    ja .LBB125_4
5251; AVX1-NEXT:  # %bb.3: # %entry
5252; AVX1-NEXT:    vmovapd %xmm0, %xmm4
5253; AVX1-NEXT:  .LBB125_4: # %entry
5254; AVX1-NEXT:    vsubsd %xmm4, %xmm3, %xmm3
5255; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
5256; AVX1-NEXT:    setbe %cl
5257; AVX1-NEXT:    movzbl %cl, %ecx
5258; AVX1-NEXT:    shlq $63, %rcx
5259; AVX1-NEXT:    xorq %rax, %rcx
5260; AVX1-NEXT:    vmovq %rcx, %xmm3
5261; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
5262; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
5263; AVX1-NEXT:    vcomisd %xmm3, %xmm0
5264; AVX1-NEXT:    ja .LBB125_6
5265; AVX1-NEXT:  # %bb.5: # %entry
5266; AVX1-NEXT:    vmovapd %xmm0, %xmm1
5267; AVX1-NEXT:  .LBB125_6: # %entry
5268; AVX1-NEXT:    vsubsd %xmm1, %xmm3, %xmm0
5269; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
5270; AVX1-NEXT:    setbe %cl
5271; AVX1-NEXT:    movzbl %cl, %ecx
5272; AVX1-NEXT:    shlq $63, %rcx
5273; AVX1-NEXT:    xorq %rax, %rcx
5274; AVX1-NEXT:    vmovq %rcx, %xmm0
5275; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
5276; AVX1-NEXT:    retq
5277;
5278; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f64:
5279; AVX512:       # %bb.0: # %entry
5280; AVX512-NEXT:    vcvttsd2usi {{.*}}(%rip), %rax
5281; AVX512-NEXT:    vmovq %rax, %xmm0
5282; AVX512-NEXT:    vcvttsd2usi {{.*}}(%rip), %rax
5283; AVX512-NEXT:    vmovq %rax, %xmm1
5284; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
5285; AVX512-NEXT:    vcvttsd2usi {{.*}}(%rip), %rax
5286; AVX512-NEXT:    vmovq %rax, %xmm1
5287; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
5288; AVX512-NEXT:    retq
5289entry:
5290  %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64(
5291                                <3 x double><double 42.1, double 42.2,
5292                                             double 42.3>,
5293                                metadata !"fpexcept.strict") #0
5294  ret <3 x i64> %result
5295}
5296
5297define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
5298; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64:
5299; CHECK:       # %bb.0: # %entry
5300; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5301; CHECK-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
5302; CHECK-NEXT:    comisd %xmm0, %xmm2
5303; CHECK-NEXT:    xorpd %xmm1, %xmm1
5304; CHECK-NEXT:    xorpd %xmm3, %xmm3
5305; CHECK-NEXT:    ja .LBB126_2
5306; CHECK-NEXT:  # %bb.1: # %entry
5307; CHECK-NEXT:    movapd %xmm2, %xmm3
5308; CHECK-NEXT:  .LBB126_2: # %entry
5309; CHECK-NEXT:    subsd %xmm3, %xmm0
5310; CHECK-NEXT:    cvttsd2si %xmm0, %rcx
5311; CHECK-NEXT:    setbe %al
5312; CHECK-NEXT:    movzbl %al, %eax
5313; CHECK-NEXT:    shlq $63, %rax
5314; CHECK-NEXT:    xorq %rcx, %rax
5315; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5316; CHECK-NEXT:    comisd %xmm0, %xmm2
5317; CHECK-NEXT:    xorpd %xmm4, %xmm4
5318; CHECK-NEXT:    ja .LBB126_4
5319; CHECK-NEXT:  # %bb.3: # %entry
5320; CHECK-NEXT:    movapd %xmm2, %xmm4
5321; CHECK-NEXT:  .LBB126_4: # %entry
5322; CHECK-NEXT:    movq %rax, %xmm3
5323; CHECK-NEXT:    subsd %xmm4, %xmm0
5324; CHECK-NEXT:    cvttsd2si %xmm0, %rax
5325; CHECK-NEXT:    setbe %cl
5326; CHECK-NEXT:    movzbl %cl, %ecx
5327; CHECK-NEXT:    shlq $63, %rcx
5328; CHECK-NEXT:    xorq %rax, %rcx
5329; CHECK-NEXT:    movq %rcx, %xmm0
5330; CHECK-NEXT:    movsd {{.*#+}} xmm4 = mem[0],zero
5331; CHECK-NEXT:    comisd %xmm4, %xmm2
5332; CHECK-NEXT:    xorpd %xmm5, %xmm5
5333; CHECK-NEXT:    ja .LBB126_6
5334; CHECK-NEXT:  # %bb.5: # %entry
5335; CHECK-NEXT:    movapd %xmm2, %xmm5
5336; CHECK-NEXT:  .LBB126_6: # %entry
5337; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
5338; CHECK-NEXT:    subsd %xmm5, %xmm4
5339; CHECK-NEXT:    cvttsd2si %xmm4, %rax
5340; CHECK-NEXT:    setbe %cl
5341; CHECK-NEXT:    movzbl %cl, %ecx
5342; CHECK-NEXT:    shlq $63, %rcx
5343; CHECK-NEXT:    xorq %rax, %rcx
5344; CHECK-NEXT:    movq %rcx, %xmm3
5345; CHECK-NEXT:    movsd {{.*#+}} xmm4 = mem[0],zero
5346; CHECK-NEXT:    comisd %xmm4, %xmm2
5347; CHECK-NEXT:    ja .LBB126_8
5348; CHECK-NEXT:  # %bb.7: # %entry
5349; CHECK-NEXT:    movapd %xmm2, %xmm1
5350; CHECK-NEXT:  .LBB126_8: # %entry
5351; CHECK-NEXT:    subsd %xmm1, %xmm4
5352; CHECK-NEXT:    cvttsd2si %xmm4, %rax
5353; CHECK-NEXT:    setbe %cl
5354; CHECK-NEXT:    movzbl %cl, %ecx
5355; CHECK-NEXT:    shlq $63, %rcx
5356; CHECK-NEXT:    xorq %rax, %rcx
5357; CHECK-NEXT:    movq %rcx, %xmm1
5358; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
5359; CHECK-NEXT:    retq
5360;
5361; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f64:
5362; AVX1:       # %bb.0: # %entry
5363; AVX1-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
5364; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
5365; AVX1-NEXT:    vcomisd %xmm2, %xmm0
5366; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
5367; AVX1-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
5368; AVX1-NEXT:    ja .LBB126_2
5369; AVX1-NEXT:  # %bb.1: # %entry
5370; AVX1-NEXT:    vmovapd %xmm0, %xmm3
5371; AVX1-NEXT:  .LBB126_2: # %entry
5372; AVX1-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
5373; AVX1-NEXT:    vcvttsd2si %xmm2, %rcx
5374; AVX1-NEXT:    setbe %al
5375; AVX1-NEXT:    movzbl %al, %eax
5376; AVX1-NEXT:    shlq $63, %rax
5377; AVX1-NEXT:    xorq %rcx, %rax
5378; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
5379; AVX1-NEXT:    vcomisd %xmm3, %xmm0
5380; AVX1-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
5381; AVX1-NEXT:    ja .LBB126_4
5382; AVX1-NEXT:  # %bb.3: # %entry
5383; AVX1-NEXT:    vmovapd %xmm0, %xmm4
5384; AVX1-NEXT:  .LBB126_4: # %entry
5385; AVX1-NEXT:    vmovq %rax, %xmm2
5386; AVX1-NEXT:    vsubsd %xmm4, %xmm3, %xmm3
5387; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
5388; AVX1-NEXT:    setbe %cl
5389; AVX1-NEXT:    movzbl %cl, %ecx
5390; AVX1-NEXT:    shlq $63, %rcx
5391; AVX1-NEXT:    xorq %rax, %rcx
5392; AVX1-NEXT:    vmovq %rcx, %xmm3
5393; AVX1-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
5394; AVX1-NEXT:    vcomisd %xmm4, %xmm0
5395; AVX1-NEXT:    vxorpd %xmm5, %xmm5, %xmm5
5396; AVX1-NEXT:    ja .LBB126_6
5397; AVX1-NEXT:  # %bb.5: # %entry
5398; AVX1-NEXT:    vmovapd %xmm0, %xmm5
5399; AVX1-NEXT:  .LBB126_6: # %entry
5400; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
5401; AVX1-NEXT:    vsubsd %xmm5, %xmm4, %xmm3
5402; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
5403; AVX1-NEXT:    setbe %cl
5404; AVX1-NEXT:    movzbl %cl, %ecx
5405; AVX1-NEXT:    shlq $63, %rcx
5406; AVX1-NEXT:    xorq %rax, %rcx
5407; AVX1-NEXT:    vmovq %rcx, %xmm3
5408; AVX1-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
5409; AVX1-NEXT:    vcomisd %xmm4, %xmm0
5410; AVX1-NEXT:    ja .LBB126_8
5411; AVX1-NEXT:  # %bb.7: # %entry
5412; AVX1-NEXT:    vmovapd %xmm0, %xmm1
5413; AVX1-NEXT:  .LBB126_8: # %entry
5414; AVX1-NEXT:    vsubsd %xmm1, %xmm4, %xmm0
5415; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
5416; AVX1-NEXT:    setbe %cl
5417; AVX1-NEXT:    movzbl %cl, %ecx
5418; AVX1-NEXT:    shlq $63, %rcx
5419; AVX1-NEXT:    xorq %rax, %rcx
5420; AVX1-NEXT:    vmovq %rcx, %xmm0
5421; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
5422; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
5423; AVX1-NEXT:    retq
5424;
5425; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f64:
5426; AVX512F:       # %bb.0: # %entry
5427; AVX512F-NEXT:    vcvttsd2usi {{.*}}(%rip), %rax
5428; AVX512F-NEXT:    vmovq %rax, %xmm0
5429; AVX512F-NEXT:    vcvttsd2usi {{.*}}(%rip), %rax
5430; AVX512F-NEXT:    vmovq %rax, %xmm1
5431; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
5432; AVX512F-NEXT:    vcvttsd2usi {{.*}}(%rip), %rax
5433; AVX512F-NEXT:    vmovq %rax, %xmm1
5434; AVX512F-NEXT:    vcvttsd2usi {{.*}}(%rip), %rax
5435; AVX512F-NEXT:    vmovq %rax, %xmm2
5436; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
5437; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
5438; AVX512F-NEXT:    retq
5439;
5440; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f64:
5441; AVX512DQ:       # %bb.0: # %entry
5442; AVX512DQ-NEXT:    vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
5443; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
5444; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
5445; AVX512DQ-NEXT:    retq
5446entry:
5447  %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(
5448                                <4 x double><double 42.1, double 42.2,
5449                                             double 42.3, double 42.4>,
5450                                metadata !"fpexcept.strict") #0
5451  ret <4 x i64> %result
5452}
5453
5454
5455define <1 x float> @constrained_vector_fptrunc_v1f64() #0 {
5456; CHECK-LABEL: constrained_vector_fptrunc_v1f64:
5457; CHECK:       # %bb.0: # %entry
5458; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5459; CHECK-NEXT:    cvtsd2ss %xmm0, %xmm0
5460; CHECK-NEXT:    retq
5461;
5462; AVX-LABEL: constrained_vector_fptrunc_v1f64:
5463; AVX:       # %bb.0: # %entry
5464; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
5465; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
5466; AVX-NEXT:    retq
5467entry:
5468  %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
5469                                <1 x double><double 42.1>,
5470                                metadata !"round.dynamic",
5471                                metadata !"fpexcept.strict") #0
5472  ret <1 x float> %result
5473}
5474
5475define <2 x float> @constrained_vector_fptrunc_v2f64() #0 {
5476; CHECK-LABEL: constrained_vector_fptrunc_v2f64:
5477; CHECK:       # %bb.0: # %entry
5478; CHECK-NEXT:    cvtpd2ps {{.*}}(%rip), %xmm0
5479; CHECK-NEXT:    retq
5480;
5481; AVX-LABEL: constrained_vector_fptrunc_v2f64:
5482; AVX:       # %bb.0: # %entry
5483; AVX-NEXT:    vcvtpd2psx {{.*}}(%rip), %xmm0
5484; AVX-NEXT:    retq
5485entry:
5486  %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
5487                                <2 x double><double 42.1, double 42.2>,
5488                                metadata !"round.dynamic",
5489                                metadata !"fpexcept.strict") #0
5490  ret <2 x float> %result
5491}
5492
5493define <3 x float> @constrained_vector_fptrunc_v3f64() #0 {
5494; CHECK-LABEL: constrained_vector_fptrunc_v3f64:
5495; CHECK:       # %bb.0: # %entry
5496; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5497; CHECK-NEXT:    cvtsd2ss %xmm0, %xmm1
5498; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5499; CHECK-NEXT:    cvtsd2ss %xmm0, %xmm0
5500; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5501; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
5502; CHECK-NEXT:    cvtsd2ss %xmm1, %xmm1
5503; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5504; CHECK-NEXT:    retq
5505;
5506; AVX-LABEL: constrained_vector_fptrunc_v3f64:
5507; AVX:       # %bb.0: # %entry
5508; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
5509; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
5510; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
5511; AVX-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1
5512; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
5513; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
5514; AVX-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1
5515; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
5516; AVX-NEXT:    retq
5517entry:
5518  %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
5519                                <3 x double><double 42.1, double 42.2,
5520                                             double 42.3>,
5521                                metadata !"round.dynamic",
5522                                metadata !"fpexcept.strict") #0
5523  ret <3 x float> %result
5524}
5525
5526define <4 x float> @constrained_vector_fptrunc_v4f64() #0 {
5527; CHECK-LABEL: constrained_vector_fptrunc_v4f64:
5528; CHECK:       # %bb.0: # %entry
5529; CHECK-NEXT:    cvtpd2ps {{.*}}(%rip), %xmm1
5530; CHECK-NEXT:    cvtpd2ps {{.*}}(%rip), %xmm0
5531; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5532; CHECK-NEXT:    retq
5533;
5534; AVX-LABEL: constrained_vector_fptrunc_v4f64:
5535; AVX:       # %bb.0: # %entry
5536; AVX-NEXT:    vcvtpd2psy {{.*}}(%rip), %xmm0
5537; AVX-NEXT:    retq
5538entry:
5539  %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
5540                                <4 x double><double 42.1, double 42.2,
5541                                             double 42.3, double 42.4>,
5542                                metadata !"round.dynamic",
5543                                metadata !"fpexcept.strict") #0
5544  ret <4 x float> %result
5545}
5546
5547define <1 x double> @constrained_vector_fpext_v1f32() #0 {
5548; CHECK-LABEL: constrained_vector_fpext_v1f32:
5549; CHECK:       # %bb.0: # %entry
5550; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5551; CHECK-NEXT:    cvtss2sd %xmm0, %xmm0
5552; CHECK-NEXT:    retq
5553;
5554; AVX-LABEL: constrained_vector_fpext_v1f32:
5555; AVX:       # %bb.0: # %entry
5556; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5557; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
5558; AVX-NEXT:    retq
5559entry:
5560  %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
5561                                <1 x float><float 42.0>,
5562                                metadata !"fpexcept.strict") #0
5563  ret <1 x double> %result
5564}
5565
5566define <2 x double> @constrained_vector_fpext_v2f32() #0 {
5567; CHECK-LABEL: constrained_vector_fpext_v2f32:
5568; CHECK:       # %bb.0: # %entry
5569; CHECK-NEXT:    cvtps2pd {{.*}}(%rip), %xmm0
5570; CHECK-NEXT:    retq
5571;
5572; AVX-LABEL: constrained_vector_fpext_v2f32:
5573; AVX:       # %bb.0: # %entry
5574; AVX-NEXT:    vcvtps2pd {{.*}}(%rip), %xmm0
5575; AVX-NEXT:    retq
5576entry:
5577  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
5578                                <2 x float><float 42.0, float 43.0>,
5579                                metadata !"fpexcept.strict") #0
5580  ret <2 x double> %result
5581}
5582
5583define <3 x double> @constrained_vector_fpext_v3f32() #0 {
5584; CHECK-LABEL: constrained_vector_fpext_v3f32:
5585; CHECK:       # %bb.0: # %entry
5586; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5587; CHECK-NEXT:    cvtss2sd %xmm0, %xmm1
5588; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5589; CHECK-NEXT:    cvtss2sd %xmm0, %xmm0
5590; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
5591; CHECK-NEXT:    cvtss2sd %xmm2, %xmm2
5592; CHECK-NEXT:    movsd %xmm2, -{{[0-9]+}}(%rsp)
5593; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
5594; CHECK-NEXT:    wait
5595; CHECK-NEXT:    retq
5596;
5597; AVX-LABEL: constrained_vector_fpext_v3f32:
5598; AVX:       # %bb.0: # %entry
5599; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5600; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
5601; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
5602; AVX-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
5603; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
5604; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
5605; AVX-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
5606; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
5607; AVX-NEXT:    retq
5608entry:
5609  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
5610                                <3 x float><float 42.0, float 43.0,
5611                                            float 44.0>,
5612                                metadata !"fpexcept.strict") #0
5613  ret <3 x double> %result
5614}
5615
5616define <4 x double> @constrained_vector_fpext_v4f32() #0 {
5617; CHECK-LABEL: constrained_vector_fpext_v4f32:
5618; CHECK:       # %bb.0: # %entry
5619; CHECK-NEXT:    cvtps2pd {{.*}}(%rip), %xmm1
5620; CHECK-NEXT:    cvtps2pd {{.*}}(%rip), %xmm0
5621; CHECK-NEXT:    retq
5622;
5623; AVX-LABEL: constrained_vector_fpext_v4f32:
5624; AVX:       # %bb.0: # %entry
5625; AVX-NEXT:    vcvtps2pd {{.*}}(%rip), %ymm0
5626; AVX-NEXT:    retq
5627entry:
5628  %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
5629                                <4 x float><float 42.0, float 43.0,
5630                                            float 44.0, float 45.0>,
5631                                metadata !"fpexcept.strict") #0
5632  ret <4 x double> %result
5633}
5634
5635define <1 x float> @constrained_vector_ceil_v1f32() #0 {
5636; CHECK-LABEL: constrained_vector_ceil_v1f32:
5637; CHECK:       # %bb.0: # %entry
5638; CHECK-NEXT:    pushq %rax
5639; CHECK-NEXT:    .cfi_def_cfa_offset 16
5640; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5641; CHECK-NEXT:    callq ceilf
5642; CHECK-NEXT:    popq %rax
5643; CHECK-NEXT:    .cfi_def_cfa_offset 8
5644; CHECK-NEXT:    retq
5645;
5646; AVX-LABEL: constrained_vector_ceil_v1f32:
5647; AVX:       # %bb.0: # %entry
5648; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5649; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0
5650; AVX-NEXT:    retq
5651entry:
5652  %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
5653                               <1 x float> <float 1.5>,
5654                               metadata !"fpexcept.strict") #0
5655  ret <1 x float> %ceil
5656}
5657
5658define <2 x double> @constrained_vector_ceil_v2f64() #0 {
5659; CHECK-LABEL: constrained_vector_ceil_v2f64:
5660; CHECK:       # %bb.0: # %entry
5661; CHECK-NEXT:    subq $24, %rsp
5662; CHECK-NEXT:    .cfi_def_cfa_offset 32
5663; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5664; CHECK-NEXT:    callq ceil
5665; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
5666; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5667; CHECK-NEXT:    callq ceil
5668; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
5669; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
5670; CHECK-NEXT:    addq $24, %rsp
5671; CHECK-NEXT:    .cfi_def_cfa_offset 8
5672; CHECK-NEXT:    retq
5673;
5674; AVX-LABEL: constrained_vector_ceil_v2f64:
5675; AVX:       # %bb.0: # %entry
5676; AVX-NEXT:    vroundpd $10, {{.*}}(%rip), %xmm0
5677; AVX-NEXT:    retq
5678entry:
5679  %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
5680                                <2 x double> <double 1.1, double 1.9>,
5681                                metadata !"fpexcept.strict") #0
5682  ret <2 x double> %ceil
5683}
5684
5685define <3 x float> @constrained_vector_ceil_v3f32() #0 {
5686; CHECK-LABEL: constrained_vector_ceil_v3f32:
5687; CHECK:       # %bb.0: # %entry
5688; CHECK-NEXT:    subq $40, %rsp
5689; CHECK-NEXT:    .cfi_def_cfa_offset 48
5690; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5691; CHECK-NEXT:    callq ceilf
5692; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5693; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5694; CHECK-NEXT:    callq ceilf
5695; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
5696; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5697; CHECK-NEXT:    callq ceilf
5698; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
5699; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
5700; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
5701; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
5702; CHECK-NEXT:    movaps %xmm1, %xmm0
5703; CHECK-NEXT:    addq $40, %rsp
5704; CHECK-NEXT:    .cfi_def_cfa_offset 8
5705; CHECK-NEXT:    retq
5706;
5707; AVX-LABEL: constrained_vector_ceil_v3f32:
5708; AVX:       # %bb.0: # %entry
5709; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5710; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0
5711; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
5712; AVX-NEXT:    vroundss $10, %xmm1, %xmm1, %xmm1
5713; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
5714; AVX-NEXT:    vroundss $10, %xmm2, %xmm2, %xmm2
5715; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
5716; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
5717; AVX-NEXT:    retq
5718entry:
5719  %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
5720                              <3 x float> <float 1.5, float 2.5, float 3.5>,
5721                              metadata !"fpexcept.strict") #0
5722  ret <3 x float> %ceil
5723}
5724
5725define <3 x double> @constrained_vector_ceil_v3f64() #0 {
5726; CHECK-LABEL: constrained_vector_ceil_v3f64:
5727; CHECK:       # %bb.0: # %entry
5728; CHECK-NEXT:    subq $24, %rsp
5729; CHECK-NEXT:    .cfi_def_cfa_offset 32
5730; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5731; CHECK-NEXT:    callq ceil
5732; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
5733; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5734; CHECK-NEXT:    callq ceil
5735; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
5736; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5737; CHECK-NEXT:    callq ceil
5738; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
5739; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
5740; CHECK-NEXT:    wait
5741; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
5742; CHECK-NEXT:    # xmm0 = mem[0],zero
5743; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
5744; CHECK-NEXT:    # xmm1 = mem[0],zero
5745; CHECK-NEXT:    addq $24, %rsp
5746; CHECK-NEXT:    .cfi_def_cfa_offset 8
5747; CHECK-NEXT:    retq
5748;
5749; AVX-LABEL: constrained_vector_ceil_v3f64:
5750; AVX:       # %bb.0: # %entry
5751; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
5752; AVX-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0
5753; AVX-NEXT:    vroundpd $10, {{.*}}(%rip), %xmm1
5754; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
5755; AVX-NEXT:    retq
5756entry:
5757  %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
5758                          <3 x double> <double 1.1, double 1.9, double 1.5>,
5759                          metadata !"fpexcept.strict") #0
5760  ret <3 x double> %ceil
5761}
5762
5763define <1 x float> @constrained_vector_floor_v1f32() #0 {
5764; CHECK-LABEL: constrained_vector_floor_v1f32:
5765; CHECK:       # %bb.0: # %entry
5766; CHECK-NEXT:    pushq %rax
5767; CHECK-NEXT:    .cfi_def_cfa_offset 16
5768; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5769; CHECK-NEXT:    callq floorf
5770; CHECK-NEXT:    popq %rax
5771; CHECK-NEXT:    .cfi_def_cfa_offset 8
5772; CHECK-NEXT:    retq
5773;
5774; AVX-LABEL: constrained_vector_floor_v1f32:
5775; AVX:       # %bb.0: # %entry
5776; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5777; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0
5778; AVX-NEXT:    retq
5779entry:
5780  %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
5781                               <1 x float> <float 1.5>,
5782                               metadata !"fpexcept.strict") #0
5783  ret <1 x float> %floor
5784}
5785
5786
5787define <2 x double> @constrained_vector_floor_v2f64() #0 {
5788; CHECK-LABEL: constrained_vector_floor_v2f64:
5789; CHECK:       # %bb.0: # %entry
5790; CHECK-NEXT:    subq $24, %rsp
5791; CHECK-NEXT:    .cfi_def_cfa_offset 32
5792; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5793; CHECK-NEXT:    callq floor
5794; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
5795; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5796; CHECK-NEXT:    callq floor
5797; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
5798; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
5799; CHECK-NEXT:    addq $24, %rsp
5800; CHECK-NEXT:    .cfi_def_cfa_offset 8
5801; CHECK-NEXT:    retq
5802;
5803; AVX-LABEL: constrained_vector_floor_v2f64:
5804; AVX:       # %bb.0: # %entry
5805; AVX-NEXT:    vroundpd $9, {{.*}}(%rip), %xmm0
5806; AVX-NEXT:    retq
5807entry:
5808  %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
5809                                <2 x double> <double 1.1, double 1.9>,
5810                                metadata !"fpexcept.strict") #0
5811  ret <2 x double> %floor
5812}
5813
5814define <3 x float> @constrained_vector_floor_v3f32() #0 {
5815; CHECK-LABEL: constrained_vector_floor_v3f32:
5816; CHECK:       # %bb.0: # %entry
5817; CHECK-NEXT:    subq $40, %rsp
5818; CHECK-NEXT:    .cfi_def_cfa_offset 48
5819; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5820; CHECK-NEXT:    callq floorf
5821; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5822; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5823; CHECK-NEXT:    callq floorf
5824; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
5825; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5826; CHECK-NEXT:    callq floorf
5827; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
5828; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
5829; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
5830; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
5831; CHECK-NEXT:    movaps %xmm1, %xmm0
5832; CHECK-NEXT:    addq $40, %rsp
5833; CHECK-NEXT:    .cfi_def_cfa_offset 8
5834; CHECK-NEXT:    retq
5835;
5836; AVX-LABEL: constrained_vector_floor_v3f32:
5837; AVX:       # %bb.0: # %entry
5838; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5839; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0
5840; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
5841; AVX-NEXT:    vroundss $9, %xmm1, %xmm1, %xmm1
5842; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
5843; AVX-NEXT:    vroundss $9, %xmm2, %xmm2, %xmm2
5844; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
5845; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
5846; AVX-NEXT:    retq
5847entry:
5848  %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
5849                              <3 x float> <float 1.5, float 2.5, float 3.5>,
5850                              metadata !"fpexcept.strict") #0
5851  ret <3 x float> %floor
5852}
5853
5854define <3 x double> @constrained_vector_floor_v3f64() #0 {
5855; CHECK-LABEL: constrained_vector_floor_v3f64:
5856; CHECK:       # %bb.0: # %entry
5857; CHECK-NEXT:    subq $24, %rsp
5858; CHECK-NEXT:    .cfi_def_cfa_offset 32
5859; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5860; CHECK-NEXT:    callq floor
5861; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
5862; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5863; CHECK-NEXT:    callq floor
5864; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
5865; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5866; CHECK-NEXT:    callq floor
5867; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
5868; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
5869; CHECK-NEXT:    wait
5870; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
5871; CHECK-NEXT:    # xmm0 = mem[0],zero
5872; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
5873; CHECK-NEXT:    # xmm1 = mem[0],zero
5874; CHECK-NEXT:    addq $24, %rsp
5875; CHECK-NEXT:    .cfi_def_cfa_offset 8
5876; CHECK-NEXT:    retq
5877;
5878; AVX-LABEL: constrained_vector_floor_v3f64:
5879; AVX:       # %bb.0: # %entry
5880; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
5881; AVX-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0
5882; AVX-NEXT:    vroundpd $9, {{.*}}(%rip), %xmm1
5883; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
5884; AVX-NEXT:    retq
5885entry:
5886  %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
5887                          <3 x double> <double 1.1, double 1.9, double 1.5>,
5888                          metadata !"fpexcept.strict") #0
5889  ret <3 x double> %floor
5890}
5891
5892define <1 x float> @constrained_vector_round_v1f32() #0 {
5893; CHECK-LABEL: constrained_vector_round_v1f32:
5894; CHECK:       # %bb.0: # %entry
5895; CHECK-NEXT:    pushq %rax
5896; CHECK-NEXT:    .cfi_def_cfa_offset 16
5897; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5898; CHECK-NEXT:    callq roundf
5899; CHECK-NEXT:    popq %rax
5900; CHECK-NEXT:    .cfi_def_cfa_offset 8
5901; CHECK-NEXT:    retq
5902;
5903; AVX-LABEL: constrained_vector_round_v1f32:
5904; AVX:       # %bb.0: # %entry
5905; AVX-NEXT:    pushq %rax
5906; AVX-NEXT:    .cfi_def_cfa_offset 16
5907; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5908; AVX-NEXT:    callq roundf
5909; AVX-NEXT:    popq %rax
5910; AVX-NEXT:    .cfi_def_cfa_offset 8
5911; AVX-NEXT:    retq
5912entry:
5913  %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
5914                               <1 x float> <float 1.5>,
5915                               metadata !"fpexcept.strict") #0
5916  ret <1 x float> %round
5917}
5918
5919define <2 x double> @constrained_vector_round_v2f64() #0 {
5920; CHECK-LABEL: constrained_vector_round_v2f64:
5921; CHECK:       # %bb.0: # %entry
5922; CHECK-NEXT:    subq $24, %rsp
5923; CHECK-NEXT:    .cfi_def_cfa_offset 32
5924; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5925; CHECK-NEXT:    callq round
5926; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
5927; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5928; CHECK-NEXT:    callq round
5929; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
5930; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
5931; CHECK-NEXT:    addq $24, %rsp
5932; CHECK-NEXT:    .cfi_def_cfa_offset 8
5933; CHECK-NEXT:    retq
5934;
5935; AVX-LABEL: constrained_vector_round_v2f64:
5936; AVX:       # %bb.0: # %entry
5937; AVX-NEXT:    subq $24, %rsp
5938; AVX-NEXT:    .cfi_def_cfa_offset 32
5939; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
5940; AVX-NEXT:    callq round
5941; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
5942; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
5943; AVX-NEXT:    callq round
5944; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
5945; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
5946; AVX-NEXT:    addq $24, %rsp
5947; AVX-NEXT:    .cfi_def_cfa_offset 8
5948; AVX-NEXT:    retq
5949entry:
5950  %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
5951                                <2 x double> <double 1.1, double 1.9>,
5952                                metadata !"fpexcept.strict") #0
5953  ret <2 x double> %round
5954}
5955
5956define <3 x float> @constrained_vector_round_v3f32() #0 {
5957; CHECK-LABEL: constrained_vector_round_v3f32:
5958; CHECK:       # %bb.0: # %entry
5959; CHECK-NEXT:    subq $40, %rsp
5960; CHECK-NEXT:    .cfi_def_cfa_offset 48
5961; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5962; CHECK-NEXT:    callq roundf
5963; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5964; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5965; CHECK-NEXT:    callq roundf
5966; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
5967; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5968; CHECK-NEXT:    callq roundf
5969; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
5970; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
5971; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
5972; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
5973; CHECK-NEXT:    movaps %xmm1, %xmm0
5974; CHECK-NEXT:    addq $40, %rsp
5975; CHECK-NEXT:    .cfi_def_cfa_offset 8
5976; CHECK-NEXT:    retq
5977;
5978; AVX-LABEL: constrained_vector_round_v3f32:
5979; AVX:       # %bb.0: # %entry
5980; AVX-NEXT:    subq $40, %rsp
5981; AVX-NEXT:    .cfi_def_cfa_offset 48
5982; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5983; AVX-NEXT:    callq roundf
5984; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5985; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5986; AVX-NEXT:    callq roundf
5987; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
5988; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5989; AVX-NEXT:    callq roundf
5990; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
5991; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
5992; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
5993; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
5994; AVX-NEXT:    addq $40, %rsp
5995; AVX-NEXT:    .cfi_def_cfa_offset 8
5996; AVX-NEXT:    retq
5997entry:
5998  %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
5999                              <3 x float> <float 1.5, float 2.5, float 3.5>,
6000                              metadata !"fpexcept.strict") #0
6001  ret <3 x float> %round
6002}
6003
6004
6005define <3 x double> @constrained_vector_round_v3f64() #0 {
6006; CHECK-LABEL: constrained_vector_round_v3f64:
6007; CHECK:       # %bb.0: # %entry
6008; CHECK-NEXT:    subq $24, %rsp
6009; CHECK-NEXT:    .cfi_def_cfa_offset 32
6010; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
6011; CHECK-NEXT:    callq round
6012; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
6013; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
6014; CHECK-NEXT:    callq round
6015; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
6016; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
6017; CHECK-NEXT:    callq round
6018; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
6019; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
6020; CHECK-NEXT:    wait
6021; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
6022; CHECK-NEXT:    # xmm0 = mem[0],zero
6023; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
6024; CHECK-NEXT:    # xmm1 = mem[0],zero
6025; CHECK-NEXT:    addq $24, %rsp
6026; CHECK-NEXT:    .cfi_def_cfa_offset 8
6027; CHECK-NEXT:    retq
6028;
6029; AVX-LABEL: constrained_vector_round_v3f64:
6030; AVX:       # %bb.0: # %entry
6031; AVX-NEXT:    subq $56, %rsp
6032; AVX-NEXT:    .cfi_def_cfa_offset 64
6033; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6034; AVX-NEXT:    callq round
6035; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
6036; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6037; AVX-NEXT:    callq round
6038; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
6039; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
6040; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
6041; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6042; AVX-NEXT:    vzeroupper
6043; AVX-NEXT:    callq round
6044; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
6045; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6046; AVX-NEXT:    addq $56, %rsp
6047; AVX-NEXT:    .cfi_def_cfa_offset 8
6048; AVX-NEXT:    retq
6049entry:
6050  %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
6051                          <3 x double> <double 1.1, double 1.9, double 1.5>,
6052                          metadata !"fpexcept.strict") #0
6053  ret <3 x double> %round
6054}
6055
6056define <1 x float> @constrained_vector_trunc_v1f32() #0 {
6057; CHECK-LABEL: constrained_vector_trunc_v1f32:
6058; CHECK:       # %bb.0: # %entry
6059; CHECK-NEXT:    pushq %rax
6060; CHECK-NEXT:    .cfi_def_cfa_offset 16
6061; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6062; CHECK-NEXT:    callq truncf
6063; CHECK-NEXT:    popq %rax
6064; CHECK-NEXT:    .cfi_def_cfa_offset 8
6065; CHECK-NEXT:    retq
6066;
6067; AVX-LABEL: constrained_vector_trunc_v1f32:
6068; AVX:       # %bb.0: # %entry
6069; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6070; AVX-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
6071; AVX-NEXT:    retq
6072entry:
6073  %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
6074                               <1 x float> <float 1.5>,
6075                               metadata !"fpexcept.strict") #0
6076  ret <1 x float> %trunc
6077}
6078
6079define <2 x double> @constrained_vector_trunc_v2f64() #0 {
6080; CHECK-LABEL: constrained_vector_trunc_v2f64:
6081; CHECK:       # %bb.0: # %entry
6082; CHECK-NEXT:    subq $24, %rsp
6083; CHECK-NEXT:    .cfi_def_cfa_offset 32
6084; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
6085; CHECK-NEXT:    callq trunc
6086; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
6087; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
6088; CHECK-NEXT:    callq trunc
6089; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
6090; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
6091; CHECK-NEXT:    addq $24, %rsp
6092; CHECK-NEXT:    .cfi_def_cfa_offset 8
6093; CHECK-NEXT:    retq
6094;
6095; AVX-LABEL: constrained_vector_trunc_v2f64:
6096; AVX:       # %bb.0: # %entry
6097; AVX-NEXT:    vroundpd $11, {{.*}}(%rip), %xmm0
6098; AVX-NEXT:    retq
6099entry:
6100  %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
6101                                <2 x double> <double 1.1, double 1.9>,
6102                                metadata !"fpexcept.strict") #0
6103  ret <2 x double> %trunc
6104}
6105
6106define <3 x float> @constrained_vector_trunc_v3f32() #0 {
6107; CHECK-LABEL: constrained_vector_trunc_v3f32:
6108; CHECK:       # %bb.0: # %entry
6109; CHECK-NEXT:    subq $40, %rsp
6110; CHECK-NEXT:    .cfi_def_cfa_offset 48
6111; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6112; CHECK-NEXT:    callq truncf
6113; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6114; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6115; CHECK-NEXT:    callq truncf
6116; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
6117; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6118; CHECK-NEXT:    callq truncf
6119; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
6120; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6121; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
6122; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
6123; CHECK-NEXT:    movaps %xmm1, %xmm0
6124; CHECK-NEXT:    addq $40, %rsp
6125; CHECK-NEXT:    .cfi_def_cfa_offset 8
6126; CHECK-NEXT:    retq
6127;
6128; AVX-LABEL: constrained_vector_trunc_v3f32:
6129; AVX:       # %bb.0: # %entry
6130; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6131; AVX-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
6132; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
6133; AVX-NEXT:    vroundss $11, %xmm1, %xmm1, %xmm1
6134; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
6135; AVX-NEXT:    vroundss $11, %xmm2, %xmm2, %xmm2
6136; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
6137; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
6138; AVX-NEXT:    retq
6139entry:
6140  %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
6141                              <3 x float> <float 1.5, float 2.5, float 3.5>,
6142                              metadata !"fpexcept.strict") #0
6143  ret <3 x float> %trunc
6144}
6145
6146define <3 x double> @constrained_vector_trunc_v3f64() #0 {
6147; CHECK-LABEL: constrained_vector_trunc_v3f64:
6148; CHECK:       # %bb.0: # %entry
6149; CHECK-NEXT:    subq $24, %rsp
6150; CHECK-NEXT:    .cfi_def_cfa_offset 32
6151; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
6152; CHECK-NEXT:    callq trunc
6153; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
6154; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
6155; CHECK-NEXT:    callq trunc
6156; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
6157; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
6158; CHECK-NEXT:    callq trunc
6159; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
6160; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
6161; CHECK-NEXT:    wait
6162; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
6163; CHECK-NEXT:    # xmm0 = mem[0],zero
6164; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
6165; CHECK-NEXT:    # xmm1 = mem[0],zero
6166; CHECK-NEXT:    addq $24, %rsp
6167; CHECK-NEXT:    .cfi_def_cfa_offset 8
6168; CHECK-NEXT:    retq
6169;
6170; AVX-LABEL: constrained_vector_trunc_v3f64:
6171; AVX:       # %bb.0: # %entry
6172; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6173; AVX-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
6174; AVX-NEXT:    vroundpd $11, {{.*}}(%rip), %xmm1
6175; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6176; AVX-NEXT:    retq
6177entry:
6178  %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
6179                          <3 x double> <double 1.1, double 1.9, double 1.5>,
6180                          metadata !"fpexcept.strict") #0
6181  ret <3 x double> %trunc
6182}
6183
6184define <1 x double> @constrained_vector_sitofp_v1f64_v1i32(<1 x i32> %x) #0 {
6185; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i32:
6186; CHECK:       # %bb.0: # %entry
6187; CHECK-NEXT:    cvtsi2sd %edi, %xmm0
6188; CHECK-NEXT:    retq
6189;
6190; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i32:
6191; AVX:       # %bb.0: # %entry
6192; AVX-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0
6193; AVX-NEXT:    retq
6194entry:
6195  %result = call <1 x double>
6196           @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32> %x,
6197                                               metadata !"round.dynamic",
6198                                               metadata !"fpexcept.strict") #0
6199  ret <1 x double> %result
6200}
6201
6202define <1 x float> @constrained_vector_sitofp_v1f32_v1i32(<1 x i32> %x) #0 {
6203; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i32:
6204; CHECK:       # %bb.0: # %entry
6205; CHECK-NEXT:    cvtsi2ss %edi, %xmm0
6206; CHECK-NEXT:    retq
6207;
6208; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i32:
6209; AVX:       # %bb.0: # %entry
6210; AVX-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
6211; AVX-NEXT:    retq
6212entry:
6213  %result = call <1 x float>
6214           @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32> %x,
6215                                               metadata !"round.dynamic",
6216                                               metadata !"fpexcept.strict") #0
6217  ret <1 x float> %result
6218}
6219
6220define <1 x double> @constrained_vector_sitofp_v1f64_v1i64(<1 x i64> %x) #0 {
6221; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i64:
6222; CHECK:       # %bb.0: # %entry
6223; CHECK-NEXT:    cvtsi2sd %rdi, %xmm0
6224; CHECK-NEXT:    retq
6225;
6226; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i64:
6227; AVX:       # %bb.0: # %entry
6228; AVX-NEXT:    vcvtsi2sd %rdi, %xmm0, %xmm0
6229; AVX-NEXT:    retq
6230entry:
6231  %result = call <1 x double>
6232           @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> %x,
6233                                               metadata !"round.dynamic",
6234                                               metadata !"fpexcept.strict") #0
6235  ret <1 x double> %result
6236}
6237
6238define <1 x float> @constrained_vector_sitofp_v1f32_v1i64(<1 x i64> %x) #0 {
6239; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i64:
6240; CHECK:       # %bb.0: # %entry
6241; CHECK-NEXT:    cvtsi2ss %rdi, %xmm0
6242; CHECK-NEXT:    retq
6243;
6244; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i64:
6245; AVX:       # %bb.0: # %entry
6246; AVX-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
6247; AVX-NEXT:    retq
6248entry:
6249  %result = call <1 x float>
6250           @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64> %x,
6251                                               metadata !"round.dynamic",
6252                                               metadata !"fpexcept.strict") #0
6253  ret <1 x float> %result
6254}
6255
6256define <2 x double> @constrained_vector_sitofp_v2f64_v2i32(<2 x i32> %x) #0 {
6257; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i32:
6258; CHECK:       # %bb.0: # %entry
6259; CHECK-NEXT:    cvtdq2pd %xmm0, %xmm0
6260; CHECK-NEXT:    retq
6261;
6262; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i32:
6263; AVX:       # %bb.0: # %entry
6264; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
6265; AVX-NEXT:    retq
6266entry:
6267  %result = call <2 x double>
6268           @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x,
6269                                               metadata !"round.dynamic",
6270                                               metadata !"fpexcept.strict") #0
6271  ret <2 x double> %result
6272}
6273
6274define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
6275; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i32:
6276; CHECK:       # %bb.0: # %entry
6277; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
6278; CHECK-NEXT:    cvtdq2ps %xmm0, %xmm0
6279; CHECK-NEXT:    retq
6280;
6281; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i32:
6282; AVX:       # %bb.0: # %entry
6283; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
6284; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
6285; AVX-NEXT:    retq
6286entry:
6287  %result = call <2 x float>
6288           @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x,
6289                                               metadata !"round.dynamic",
6290                                               metadata !"fpexcept.strict") #0
6291  ret <2 x float> %result
6292}
6293
6294define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 {
6295; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i64:
6296; CHECK:       # %bb.0: # %entry
6297; CHECK-NEXT:    movq %xmm0, %rax
6298; CHECK-NEXT:    cvtsi2sd %rax, %xmm1
6299; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6300; CHECK-NEXT:    movq %xmm0, %rax
6301; CHECK-NEXT:    xorps %xmm0, %xmm0
6302; CHECK-NEXT:    cvtsi2sd %rax, %xmm0
6303; CHECK-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
6304; CHECK-NEXT:    movapd %xmm1, %xmm0
6305; CHECK-NEXT:    retq
6306;
6307; AVX1-LABEL: constrained_vector_sitofp_v2f64_v2i64:
6308; AVX1:       # %bb.0: # %entry
6309; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6310; AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
6311; AVX1-NEXT:    vmovq %xmm0, %rax
6312; AVX1-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm0
6313; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
6314; AVX1-NEXT:    retq
6315;
6316; AVX512F-LABEL: constrained_vector_sitofp_v2f64_v2i64:
6317; AVX512F:       # %bb.0: # %entry
6318; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
6319; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
6320; AVX512F-NEXT:    vmovq %xmm0, %rax
6321; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm0
6322; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
6323; AVX512F-NEXT:    retq
6324;
6325; AVX512DQ-LABEL: constrained_vector_sitofp_v2f64_v2i64:
6326; AVX512DQ:       # %bb.0: # %entry
6327; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
6328; AVX512DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
6329; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
6330; AVX512DQ-NEXT:    vzeroupper
6331; AVX512DQ-NEXT:    retq
6332entry:
6333  %result = call <2 x double>
6334           @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x,
6335                                               metadata !"round.dynamic",
6336                                               metadata !"fpexcept.strict") #0
6337  ret <2 x double> %result
6338}
6339
6340define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
6341; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i64:
6342; CHECK:       # %bb.0: # %entry
6343; CHECK-NEXT:    movq %xmm0, %rax
6344; CHECK-NEXT:    cvtsi2ss %rax, %xmm1
6345; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6346; CHECK-NEXT:    movq %xmm0, %rax
6347; CHECK-NEXT:    xorps %xmm0, %xmm0
6348; CHECK-NEXT:    cvtsi2ss %rax, %xmm0
6349; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6350; CHECK-NEXT:    movaps %xmm1, %xmm0
6351; CHECK-NEXT:    retq
6352;
6353; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i64:
6354; AVX:       # %bb.0: # %entry
6355; AVX-NEXT:    vpextrq $1, %xmm0, %rax
6356; AVX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
6357; AVX-NEXT:    vmovq %xmm0, %rax
6358; AVX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
6359; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
6360; AVX-NEXT:    retq
6361entry:
6362  %result = call <2 x float>
6363           @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64> %x,
6364                                               metadata !"round.dynamic",
6365                                               metadata !"fpexcept.strict") #0
6366  ret <2 x float> %result
6367}
6368
6369define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 {
6370; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i32:
6371; CHECK:       # %bb.0: # %entry
6372; CHECK-NEXT:    movd %xmm0, %eax
6373; CHECK-NEXT:    cvtsi2sd %eax, %xmm2
6374; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
6375; CHECK-NEXT:    movd %xmm1, %eax
6376; CHECK-NEXT:    xorps %xmm1, %xmm1
6377; CHECK-NEXT:    cvtsi2sd %eax, %xmm1
6378; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6379; CHECK-NEXT:    movd %xmm0, %eax
6380; CHECK-NEXT:    xorps %xmm0, %xmm0
6381; CHECK-NEXT:    cvtsi2sd %eax, %xmm0
6382; CHECK-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
6383; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
6384; CHECK-NEXT:    wait
6385; CHECK-NEXT:    movapd %xmm2, %xmm0
6386; CHECK-NEXT:    retq
6387;
6388; AVX-LABEL: constrained_vector_sitofp_v3f64_v3i32:
6389; AVX:       # %bb.0: # %entry
6390; AVX-NEXT:    vextractps $1, %xmm0, %eax
6391; AVX-NEXT:    vcvtsi2sd %eax, %xmm1, %xmm1
6392; AVX-NEXT:    vmovd %xmm0, %eax
6393; AVX-NEXT:    vcvtsi2sd %eax, %xmm2, %xmm2
6394; AVX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
6395; AVX-NEXT:    vpextrd $2, %xmm0, %eax
6396; AVX-NEXT:    vcvtsi2sd %eax, %xmm3, %xmm0
6397; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6398; AVX-NEXT:    retq
6399entry:
6400  %result = call <3 x double>
6401           @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32> %x,
6402                                               metadata !"round.dynamic",
6403                                               metadata !"fpexcept.strict") #0
6404  ret <3 x double> %result
6405}
6406
6407define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
6408; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i32:
6409; CHECK:       # %bb.0: # %entry
6410; CHECK-NEXT:    movd %xmm0, %eax
6411; CHECK-NEXT:    cvtsi2ss %eax, %xmm1
6412; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
6413; CHECK-NEXT:    movd %xmm2, %eax
6414; CHECK-NEXT:    xorps %xmm2, %xmm2
6415; CHECK-NEXT:    cvtsi2ss %eax, %xmm2
6416; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
6417; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6418; CHECK-NEXT:    movd %xmm0, %eax
6419; CHECK-NEXT:    xorps %xmm0, %xmm0
6420; CHECK-NEXT:    cvtsi2ss %eax, %xmm0
6421; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
6422; CHECK-NEXT:    movaps %xmm1, %xmm0
6423; CHECK-NEXT:    retq
6424;
6425; AVX-LABEL: constrained_vector_sitofp_v3f32_v3i32:
6426; AVX:       # %bb.0: # %entry
6427; AVX-NEXT:    vextractps $1, %xmm0, %eax
6428; AVX-NEXT:    vcvtsi2ss %eax, %xmm1, %xmm1
6429; AVX-NEXT:    vmovd %xmm0, %eax
6430; AVX-NEXT:    vcvtsi2ss %eax, %xmm2, %xmm2
6431; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
6432; AVX-NEXT:    vpextrd $2, %xmm0, %eax
6433; AVX-NEXT:    vcvtsi2ss %eax, %xmm3, %xmm0
6434; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
6435; AVX-NEXT:    retq
6436entry:
6437  %result = call <3 x float>
6438           @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32> %x,
6439                                               metadata !"round.dynamic",
6440                                               metadata !"fpexcept.strict") #0
6441  ret <3 x float> %result
6442}
6443
6444define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 {
6445; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i64:
6446; CHECK:       # %bb.0: # %entry
6447; CHECK-NEXT:    cvtsi2sd %rsi, %xmm1
6448; CHECK-NEXT:    cvtsi2sd %rdi, %xmm0
6449; CHECK-NEXT:    cvtsi2sd %rdx, %xmm2
6450; CHECK-NEXT:    movsd %xmm2, -{{[0-9]+}}(%rsp)
6451; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
6452; CHECK-NEXT:    wait
6453; CHECK-NEXT:    retq
6454;
6455; AVX1-LABEL: constrained_vector_sitofp_v3f64_v3i64:
6456; AVX1:       # %bb.0: # %entry
6457; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6458; AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
6459; AVX1-NEXT:    vmovq %xmm0, %rax
6460; AVX1-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
6461; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
6462; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
6463; AVX1-NEXT:    vmovq %xmm0, %rax
6464; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
6465; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6466; AVX1-NEXT:    retq
6467;
6468; AVX512-LABEL: constrained_vector_sitofp_v3f64_v3i64:
6469; AVX512:       # %bb.0: # %entry
6470; AVX512-NEXT:    vpextrq $1, %xmm0, %rax
6471; AVX512-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
6472; AVX512-NEXT:    vmovq %xmm0, %rax
6473; AVX512-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
6474; AVX512-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
6475; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
6476; AVX512-NEXT:    vmovq %xmm0, %rax
6477; AVX512-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
6478; AVX512-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6479; AVX512-NEXT:    retq
6480entry:
6481  %result = call <3 x double>
6482           @llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64> %x,
6483                                               metadata !"round.dynamic",
6484                                               metadata !"fpexcept.strict") #0
6485  ret <3 x double> %result
6486}
6487
6488define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
6489; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i64:
6490; CHECK:       # %bb.0: # %entry
6491; CHECK-NEXT:    cvtsi2ss %rsi, %xmm1
6492; CHECK-NEXT:    cvtsi2ss %rdi, %xmm0
6493; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6494; CHECK-NEXT:    xorps %xmm1, %xmm1
6495; CHECK-NEXT:    cvtsi2ss %rdx, %xmm1
6496; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
6497; CHECK-NEXT:    retq
6498;
6499; AVX1-LABEL: constrained_vector_sitofp_v3f32_v3i64:
6500; AVX1:       # %bb.0: # %entry
6501; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6502; AVX1-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
6503; AVX1-NEXT:    vmovq %xmm0, %rax
6504; AVX1-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
6505; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
6506; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
6507; AVX1-NEXT:    vmovq %xmm0, %rax
6508; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
6509; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
6510; AVX1-NEXT:    vzeroupper
6511; AVX1-NEXT:    retq
6512;
6513; AVX512-LABEL: constrained_vector_sitofp_v3f32_v3i64:
6514; AVX512:       # %bb.0: # %entry
6515; AVX512-NEXT:    vpextrq $1, %xmm0, %rax
6516; AVX512-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
6517; AVX512-NEXT:    vmovq %xmm0, %rax
6518; AVX512-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
6519; AVX512-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
6520; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
6521; AVX512-NEXT:    vmovq %xmm0, %rax
6522; AVX512-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
6523; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
6524; AVX512-NEXT:    vzeroupper
6525; AVX512-NEXT:    retq
6526entry:
6527  %result = call <3 x float>
6528           @llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64> %x,
6529                                               metadata !"round.dynamic",
6530                                               metadata !"fpexcept.strict") #0
6531  ret <3 x float> %result
6532}
6533
6534define <4 x double> @constrained_vector_sitofp_v4f64_v4i32(<4 x i32> %x) #0 {
6535; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i32:
6536; CHECK:       # %bb.0: # %entry
6537; CHECK-NEXT:    cvtdq2pd %xmm0, %xmm2
6538; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6539; CHECK-NEXT:    cvtdq2pd %xmm0, %xmm1
6540; CHECK-NEXT:    movaps %xmm2, %xmm0
6541; CHECK-NEXT:    retq
6542;
6543; AVX-LABEL: constrained_vector_sitofp_v4f64_v4i32:
6544; AVX:       # %bb.0: # %entry
6545; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
6546; AVX-NEXT:    retq
6547entry:
6548  %result = call <4 x double>
6549           @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %x,
6550                                               metadata !"round.dynamic",
6551                                               metadata !"fpexcept.strict") #0
6552  ret <4 x double> %result
6553}
6554
6555define <4 x float> @constrained_vector_sitofp_v4f32_v4i32(<4 x i32> %x) #0 {
6556; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i32:
6557; CHECK:       # %bb.0: # %entry
6558; CHECK-NEXT:    cvtdq2ps %xmm0, %xmm0
6559; CHECK-NEXT:    retq
6560;
6561; AVX-LABEL: constrained_vector_sitofp_v4f32_v4i32:
6562; AVX:       # %bb.0: # %entry
6563; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
6564; AVX-NEXT:    retq
6565entry:
6566  %result = call <4 x float>
6567           @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x,
6568                                               metadata !"round.dynamic",
6569                                               metadata !"fpexcept.strict") #0
6570  ret <4 x float> %result
6571}
6572
6573define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 {
6574; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i64:
6575; CHECK:       # %bb.0: # %entry
6576; CHECK-NEXT:    movq %xmm0, %rax
6577; CHECK-NEXT:    cvtsi2sd %rax, %xmm2
6578; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6579; CHECK-NEXT:    movq %xmm0, %rax
6580; CHECK-NEXT:    xorps %xmm0, %xmm0
6581; CHECK-NEXT:    cvtsi2sd %rax, %xmm0
6582; CHECK-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
6583; CHECK-NEXT:    movq %xmm1, %rax
6584; CHECK-NEXT:    cvtsi2sd %rax, %xmm3
6585; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
6586; CHECK-NEXT:    movq %xmm0, %rax
6587; CHECK-NEXT:    xorps %xmm0, %xmm0
6588; CHECK-NEXT:    cvtsi2sd %rax, %xmm0
6589; CHECK-NEXT:    unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
6590; CHECK-NEXT:    movapd %xmm2, %xmm0
6591; CHECK-NEXT:    movapd %xmm3, %xmm1
6592; CHECK-NEXT:    retq
6593;
6594; AVX1-LABEL: constrained_vector_sitofp_v4f64_v4i64:
6595; AVX1:       # %bb.0: # %entry
6596; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
6597; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
6598; AVX1-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
6599; AVX1-NEXT:    vmovq %xmm1, %rax
6600; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
6601; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
6602; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6603; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
6604; AVX1-NEXT:    vmovq %xmm0, %rax
6605; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
6606; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
6607; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
6608; AVX1-NEXT:    retq
6609;
6610; AVX512F-LABEL: constrained_vector_sitofp_v4f64_v4i64:
6611; AVX512F:       # %bb.0: # %entry
6612; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
6613; AVX512F-NEXT:    vpextrq $1, %xmm1, %rax
6614; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
6615; AVX512F-NEXT:    vmovq %xmm1, %rax
6616; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
6617; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
6618; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
6619; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
6620; AVX512F-NEXT:    vmovq %xmm0, %rax
6621; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
6622; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
6623; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
6624; AVX512F-NEXT:    retq
6625;
6626; AVX512DQ-LABEL: constrained_vector_sitofp_v4f64_v4i64:
6627; AVX512DQ:       # %bb.0: # %entry
6628; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
6629; AVX512DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
6630; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
6631; AVX512DQ-NEXT:    retq
6632entry:
6633  %result = call <4 x double>
6634           @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x,
6635                                               metadata !"round.dynamic",
6636                                               metadata !"fpexcept.strict") #0
6637  ret <4 x double> %result
6638}
6639
6640define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
6641; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i64:
6642; CHECK:       # %bb.0: # %entry
6643; CHECK-NEXT:    movq %xmm1, %rax
6644; CHECK-NEXT:    cvtsi2ss %rax, %xmm2
6645; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
6646; CHECK-NEXT:    movq %xmm1, %rax
6647; CHECK-NEXT:    xorps %xmm1, %xmm1
6648; CHECK-NEXT:    cvtsi2ss %rax, %xmm1
6649; CHECK-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
6650; CHECK-NEXT:    movq %xmm0, %rax
6651; CHECK-NEXT:    xorps %xmm1, %xmm1
6652; CHECK-NEXT:    cvtsi2ss %rax, %xmm1
6653; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6654; CHECK-NEXT:    movq %xmm0, %rax
6655; CHECK-NEXT:    xorps %xmm0, %xmm0
6656; CHECK-NEXT:    cvtsi2ss %rax, %xmm0
6657; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6658; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
6659; CHECK-NEXT:    movaps %xmm1, %xmm0
6660; CHECK-NEXT:    retq
6661;
6662; AVX1-LABEL: constrained_vector_sitofp_v4f32_v4i64:
6663; AVX1:       # %bb.0: # %entry
6664; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6665; AVX1-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
6666; AVX1-NEXT:    vmovq %xmm0, %rax
6667; AVX1-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
6668; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
6669; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
6670; AVX1-NEXT:    vmovq %xmm0, %rax
6671; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
6672; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
6673; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6674; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
6675; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
6676; AVX1-NEXT:    vzeroupper
6677; AVX1-NEXT:    retq
6678;
6679; AVX512F-LABEL: constrained_vector_sitofp_v4f32_v4i64:
6680; AVX512F:       # %bb.0: # %entry
6681; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
6682; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
6683; AVX512F-NEXT:    vmovq %xmm0, %rax
6684; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
6685; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
6686; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
6687; AVX512F-NEXT:    vmovq %xmm0, %rax
6688; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
6689; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
6690; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
6691; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
6692; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
6693; AVX512F-NEXT:    vzeroupper
6694; AVX512F-NEXT:    retq
6695;
6696; AVX512DQ-LABEL: constrained_vector_sitofp_v4f32_v4i64:
6697; AVX512DQ:       # %bb.0: # %entry
6698; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
6699; AVX512DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
6700; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
6701; AVX512DQ-NEXT:    vzeroupper
6702; AVX512DQ-NEXT:    retq
6703entry:
6704  %result = call <4 x float>
6705           @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x,
6706                                               metadata !"round.dynamic",
6707                                               metadata !"fpexcept.strict") #0
6708  ret <4 x float> %result
6709}
6710
6711define <1 x double> @constrained_vector_uitofp_v1f64_v1i32(<1 x i32> %x) #0 {
6712; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i32:
6713; CHECK:       # %bb.0: # %entry
6714; CHECK-NEXT:    movl %edi, %eax
6715; CHECK-NEXT:    cvtsi2sd %rax, %xmm0
6716; CHECK-NEXT:    retq
6717;
6718; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i32:
6719; AVX1:       # %bb.0: # %entry
6720; AVX1-NEXT:    movl %edi, %eax
6721; AVX1-NEXT:    vcvtsi2sd %rax, %xmm0, %xmm0
6722; AVX1-NEXT:    retq
6723;
6724; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i32:
6725; AVX512:       # %bb.0: # %entry
6726; AVX512-NEXT:    vcvtusi2sd %edi, %xmm0, %xmm0
6727; AVX512-NEXT:    retq
6728entry:
6729  %result = call <1 x double>
6730           @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32> %x,
6731                                               metadata !"round.dynamic",
6732                                               metadata !"fpexcept.strict") #0
6733  ret <1 x double> %result
6734}
6735
6736define <1 x float> @constrained_vector_uitofp_v1f32_v1i32(<1 x i32> %x) #0 {
6737; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i32:
6738; CHECK:       # %bb.0: # %entry
6739; CHECK-NEXT:    movl %edi, %eax
6740; CHECK-NEXT:    cvtsi2ss %rax, %xmm0
6741; CHECK-NEXT:    retq
6742;
6743; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i32:
6744; AVX1:       # %bb.0: # %entry
6745; AVX1-NEXT:    movl %edi, %eax
6746; AVX1-NEXT:    vcvtsi2ss %rax, %xmm0, %xmm0
6747; AVX1-NEXT:    retq
6748;
6749; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i32:
6750; AVX512:       # %bb.0: # %entry
6751; AVX512-NEXT:    vcvtusi2ss %edi, %xmm0, %xmm0
6752; AVX512-NEXT:    retq
6753entry:
6754  %result = call <1 x float>
6755           @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32> %x,
6756                                               metadata !"round.dynamic",
6757                                               metadata !"fpexcept.strict") #0
6758  ret <1 x float> %result
6759}
6760
6761define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
6762; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i64:
6763; CHECK:       # %bb.0: # %entry
6764; CHECK-NEXT:    movq %rdi, %rax
6765; CHECK-NEXT:    shrq %rax
6766; CHECK-NEXT:    movl %edi, %ecx
6767; CHECK-NEXT:    andl $1, %ecx
6768; CHECK-NEXT:    orq %rax, %rcx
6769; CHECK-NEXT:    testq %rdi, %rdi
6770; CHECK-NEXT:    cmovnsq %rdi, %rcx
6771; CHECK-NEXT:    cvtsi2sd %rcx, %xmm0
6772; CHECK-NEXT:    jns .LBB169_2
6773; CHECK-NEXT:  # %bb.1:
6774; CHECK-NEXT:    addsd %xmm0, %xmm0
6775; CHECK-NEXT:  .LBB169_2: # %entry
6776; CHECK-NEXT:    retq
6777;
6778; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i64:
6779; AVX1:       # %bb.0: # %entry
6780; AVX1-NEXT:    movq %rdi, %rax
6781; AVX1-NEXT:    shrq %rax
6782; AVX1-NEXT:    movl %edi, %ecx
6783; AVX1-NEXT:    andl $1, %ecx
6784; AVX1-NEXT:    orq %rax, %rcx
6785; AVX1-NEXT:    testq %rdi, %rdi
6786; AVX1-NEXT:    cmovnsq %rdi, %rcx
6787; AVX1-NEXT:    vcvtsi2sd %rcx, %xmm0, %xmm0
6788; AVX1-NEXT:    jns .LBB169_2
6789; AVX1-NEXT:  # %bb.1:
6790; AVX1-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
6791; AVX1-NEXT:  .LBB169_2: # %entry
6792; AVX1-NEXT:    retq
6793;
6794; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i64:
6795; AVX512:       # %bb.0: # %entry
6796; AVX512-NEXT:    vcvtusi2sd %rdi, %xmm0, %xmm0
6797; AVX512-NEXT:    retq
6798entry:
6799  %result = call <1 x double>
6800           @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> %x,
6801                                               metadata !"round.dynamic",
6802                                               metadata !"fpexcept.strict") #0
6803  ret <1 x double> %result
6804}
6805
6806define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
6807; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i64:
6808; CHECK:       # %bb.0: # %entry
6809; CHECK-NEXT:    movq %rdi, %rax
6810; CHECK-NEXT:    shrq %rax
6811; CHECK-NEXT:    movl %edi, %ecx
6812; CHECK-NEXT:    andl $1, %ecx
6813; CHECK-NEXT:    orq %rax, %rcx
6814; CHECK-NEXT:    testq %rdi, %rdi
6815; CHECK-NEXT:    cmovnsq %rdi, %rcx
6816; CHECK-NEXT:    cvtsi2ss %rcx, %xmm0
6817; CHECK-NEXT:    jns .LBB170_2
6818; CHECK-NEXT:  # %bb.1:
6819; CHECK-NEXT:    addss %xmm0, %xmm0
6820; CHECK-NEXT:  .LBB170_2: # %entry
6821; CHECK-NEXT:    retq
6822;
6823; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64:
6824; AVX1:       # %bb.0: # %entry
6825; AVX1-NEXT:    movq %rdi, %rax
6826; AVX1-NEXT:    shrq %rax
6827; AVX1-NEXT:    movl %edi, %ecx
6828; AVX1-NEXT:    andl $1, %ecx
6829; AVX1-NEXT:    orq %rax, %rcx
6830; AVX1-NEXT:    testq %rdi, %rdi
6831; AVX1-NEXT:    cmovnsq %rdi, %rcx
6832; AVX1-NEXT:    vcvtsi2ss %rcx, %xmm0, %xmm0
6833; AVX1-NEXT:    jns .LBB170_2
6834; AVX1-NEXT:  # %bb.1:
6835; AVX1-NEXT:    vaddss %xmm0, %xmm0, %xmm0
6836; AVX1-NEXT:  .LBB170_2: # %entry
6837; AVX1-NEXT:    retq
6838;
6839; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64:
6840; AVX512:       # %bb.0: # %entry
6841; AVX512-NEXT:    vcvtusi2ss %rdi, %xmm0, %xmm0
6842; AVX512-NEXT:    retq
6843entry:
6844  %result = call <1 x float>
6845           @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64> %x,
6846                                               metadata !"round.dynamic",
6847                                               metadata !"fpexcept.strict") #0
6848  ret <1 x float> %result
6849}
6850
6851define <2 x double> @constrained_vector_uitofp_v2f64_v2i32(<2 x i32> %x) #0 {
6852; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i32:
6853; CHECK:       # %bb.0: # %entry
6854; CHECK-NEXT:    xorpd %xmm1, %xmm1
6855; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6856; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
6857; CHECK-NEXT:    orpd %xmm1, %xmm0
6858; CHECK-NEXT:    subpd %xmm1, %xmm0
6859; CHECK-NEXT:    retq
6860;
6861; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i32:
6862; AVX1:       # %bb.0: # %entry
6863; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6864; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
6865; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
6866; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
6867; AVX1-NEXT:    retq
6868;
6869; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i32:
6870; AVX512:       # %bb.0: # %entry
6871; AVX512-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
6872; AVX512-NEXT:    vcvtudq2pd %ymm0, %zmm0
6873; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
6874; AVX512-NEXT:    vzeroupper
6875; AVX512-NEXT:    retq
6876entry:
6877  %result = call <2 x double>
6878           @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x,
6879                                               metadata !"round.dynamic",
6880                                               metadata !"fpexcept.strict") #0
6881  ret <2 x double> %result
6882}
6883
6884define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 {
6885; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i32:
6886; CHECK:       # %bb.0: # %entry
6887; CHECK-NEXT:    xorpd %xmm1, %xmm1
6888; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6889; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
6890; CHECK-NEXT:    orpd %xmm1, %xmm0
6891; CHECK-NEXT:    subpd %xmm1, %xmm0
6892; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0
6893; CHECK-NEXT:    retq
6894;
6895; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i32:
6896; AVX1:       # %bb.0: # %entry
6897; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6898; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
6899; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
6900; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
6901; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0
6902; AVX1-NEXT:    retq
6903;
6904; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i32:
6905; AVX512:       # %bb.0: # %entry
6906; AVX512-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
6907; AVX512-NEXT:    vcvtudq2ps %zmm0, %zmm0
6908; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
6909; AVX512-NEXT:    vzeroupper
6910; AVX512-NEXT:    retq
6911entry:
6912  %result = call <2 x float>
6913           @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x,
6914                                               metadata !"round.dynamic",
6915                                               metadata !"fpexcept.strict") #0
6916  ret <2 x float> %result
6917}
6918
6919define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
6920; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i64:
6921; CHECK:       # %bb.0: # %entry
6922; CHECK-NEXT:    movdqa %xmm0, %xmm1
6923; CHECK-NEXT:    movq %xmm0, %rax
6924; CHECK-NEXT:    movq %rax, %rcx
6925; CHECK-NEXT:    shrq %rcx
6926; CHECK-NEXT:    movl %eax, %edx
6927; CHECK-NEXT:    andl $1, %edx
6928; CHECK-NEXT:    orq %rcx, %rdx
6929; CHECK-NEXT:    testq %rax, %rax
6930; CHECK-NEXT:    cmovnsq %rax, %rdx
6931; CHECK-NEXT:    xorps %xmm0, %xmm0
6932; CHECK-NEXT:    cvtsi2sd %rdx, %xmm0
6933; CHECK-NEXT:    jns .LBB173_2
6934; CHECK-NEXT:  # %bb.1:
6935; CHECK-NEXT:    addsd %xmm0, %xmm0
6936; CHECK-NEXT:  .LBB173_2: # %entry
6937; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
6938; CHECK-NEXT:    movq %xmm1, %rax
6939; CHECK-NEXT:    movq %rax, %rcx
6940; CHECK-NEXT:    shrq %rcx
6941; CHECK-NEXT:    movl %eax, %edx
6942; CHECK-NEXT:    andl $1, %edx
6943; CHECK-NEXT:    orq %rcx, %rdx
6944; CHECK-NEXT:    testq %rax, %rax
6945; CHECK-NEXT:    cmovnsq %rax, %rdx
6946; CHECK-NEXT:    xorps %xmm1, %xmm1
6947; CHECK-NEXT:    cvtsi2sd %rdx, %xmm1
6948; CHECK-NEXT:    jns .LBB173_4
6949; CHECK-NEXT:  # %bb.3:
6950; CHECK-NEXT:    addsd %xmm1, %xmm1
6951; CHECK-NEXT:  .LBB173_4: # %entry
6952; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
6953; CHECK-NEXT:    retq
6954;
6955; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i64:
6956; AVX1:       # %bb.0: # %entry
6957; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6958; AVX1-NEXT:    movq %rax, %rcx
6959; AVX1-NEXT:    shrq %rcx
6960; AVX1-NEXT:    movl %eax, %edx
6961; AVX1-NEXT:    andl $1, %edx
6962; AVX1-NEXT:    orq %rcx, %rdx
6963; AVX1-NEXT:    testq %rax, %rax
6964; AVX1-NEXT:    cmovnsq %rax, %rdx
6965; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm1, %xmm1
6966; AVX1-NEXT:    jns .LBB173_2
6967; AVX1-NEXT:  # %bb.1:
6968; AVX1-NEXT:    vaddsd %xmm1, %xmm1, %xmm1
6969; AVX1-NEXT:  .LBB173_2: # %entry
6970; AVX1-NEXT:    vmovq %xmm0, %rax
6971; AVX1-NEXT:    movq %rax, %rcx
6972; AVX1-NEXT:    shrq %rcx
6973; AVX1-NEXT:    movl %eax, %edx
6974; AVX1-NEXT:    andl $1, %edx
6975; AVX1-NEXT:    orq %rcx, %rdx
6976; AVX1-NEXT:    testq %rax, %rax
6977; AVX1-NEXT:    cmovnsq %rax, %rdx
6978; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm2, %xmm0
6979; AVX1-NEXT:    jns .LBB173_4
6980; AVX1-NEXT:  # %bb.3:
6981; AVX1-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
6982; AVX1-NEXT:  .LBB173_4: # %entry
6983; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
6984; AVX1-NEXT:    retq
6985;
6986; AVX512F-LABEL: constrained_vector_uitofp_v2f64_v2i64:
6987; AVX512F:       # %bb.0: # %entry
6988; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
6989; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm1, %xmm1
6990; AVX512F-NEXT:    vmovq %xmm0, %rax
6991; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm2, %xmm0
6992; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
6993; AVX512F-NEXT:    retq
6994;
6995; AVX512DQ-LABEL: constrained_vector_uitofp_v2f64_v2i64:
6996; AVX512DQ:       # %bb.0: # %entry
6997; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
6998; AVX512DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
6999; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
7000; AVX512DQ-NEXT:    vzeroupper
7001; AVX512DQ-NEXT:    retq
7002entry:
7003  %result = call <2 x double>
7004           @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x,
7005                                               metadata !"round.dynamic",
7006                                               metadata !"fpexcept.strict") #0
7007  ret <2 x double> %result
7008}
7009
7010define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
7011; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i64:
7012; CHECK:       # %bb.0: # %entry
7013; CHECK-NEXT:    movdqa %xmm0, %xmm1
7014; CHECK-NEXT:    movq %xmm0, %rax
7015; CHECK-NEXT:    movq %rax, %rcx
7016; CHECK-NEXT:    shrq %rcx
7017; CHECK-NEXT:    movl %eax, %edx
7018; CHECK-NEXT:    andl $1, %edx
7019; CHECK-NEXT:    orq %rcx, %rdx
7020; CHECK-NEXT:    testq %rax, %rax
7021; CHECK-NEXT:    cmovnsq %rax, %rdx
7022; CHECK-NEXT:    xorps %xmm0, %xmm0
7023; CHECK-NEXT:    cvtsi2ss %rdx, %xmm0
7024; CHECK-NEXT:    jns .LBB174_2
7025; CHECK-NEXT:  # %bb.1:
7026; CHECK-NEXT:    addss %xmm0, %xmm0
7027; CHECK-NEXT:  .LBB174_2: # %entry
7028; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
7029; CHECK-NEXT:    movq %xmm1, %rax
7030; CHECK-NEXT:    movq %rax, %rcx
7031; CHECK-NEXT:    shrq %rcx
7032; CHECK-NEXT:    movl %eax, %edx
7033; CHECK-NEXT:    andl $1, %edx
7034; CHECK-NEXT:    orq %rcx, %rdx
7035; CHECK-NEXT:    testq %rax, %rax
7036; CHECK-NEXT:    cmovnsq %rax, %rdx
7037; CHECK-NEXT:    xorps %xmm1, %xmm1
7038; CHECK-NEXT:    cvtsi2ss %rdx, %xmm1
7039; CHECK-NEXT:    jns .LBB174_4
7040; CHECK-NEXT:  # %bb.3:
7041; CHECK-NEXT:    addss %xmm1, %xmm1
7042; CHECK-NEXT:  .LBB174_4: # %entry
7043; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7044; CHECK-NEXT:    retq
7045;
7046; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i64:
7047; AVX1:       # %bb.0: # %entry
7048; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm1
7049; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm2
7050; AVX1-NEXT:    vpor %xmm1, %xmm2, %xmm1
7051; AVX1-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
7052; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
7053; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
7054; AVX1-NEXT:    vmovq %xmm1, %rax
7055; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
7056; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
7057; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm2
7058; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
7059; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
7060; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
7061; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
7062; AVX1-NEXT:    retq
7063;
7064; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i64:
7065; AVX512:       # %bb.0: # %entry
7066; AVX512-NEXT:    vpextrq $1, %xmm0, %rax
7067; AVX512-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
7068; AVX512-NEXT:    vmovq %xmm0, %rax
7069; AVX512-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
7070; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
7071; AVX512-NEXT:    retq
7072entry:
7073  %result = call <2 x float>
7074           @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64> %x,
7075                                               metadata !"round.dynamic",
7076                                               metadata !"fpexcept.strict") #0
7077  ret <2 x float> %result
7078}
7079
7080define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 {
7081; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i32:
7082; CHECK:       # %bb.0: # %entry
7083; CHECK-NEXT:    movd %xmm0, %eax
7084; CHECK-NEXT:    cvtsi2sd %rax, %xmm2
7085; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
7086; CHECK-NEXT:    movd %xmm1, %eax
7087; CHECK-NEXT:    xorps %xmm1, %xmm1
7088; CHECK-NEXT:    cvtsi2sd %rax, %xmm1
7089; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
7090; CHECK-NEXT:    movd %xmm0, %eax
7091; CHECK-NEXT:    xorps %xmm0, %xmm0
7092; CHECK-NEXT:    cvtsi2sd %rax, %xmm0
7093; CHECK-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
7094; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
7095; CHECK-NEXT:    wait
7096; CHECK-NEXT:    movapd %xmm2, %xmm0
7097; CHECK-NEXT:    retq
7098;
7099; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i32:
7100; AVX1:       # %bb.0: # %entry
7101; AVX1-NEXT:    vextractps $1, %xmm0, %eax
7102; AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
7103; AVX1-NEXT:    vmovd %xmm0, %eax
7104; AVX1-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
7105; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
7106; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
7107; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
7108; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
7109; AVX1-NEXT:    retq
7110;
7111; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i32:
7112; AVX512:       # %bb.0: # %entry
7113; AVX512-NEXT:    vextractps $1, %xmm0, %eax
7114; AVX512-NEXT:    vcvtusi2sd %eax, %xmm1, %xmm1
7115; AVX512-NEXT:    vmovd %xmm0, %eax
7116; AVX512-NEXT:    vcvtusi2sd %eax, %xmm2, %xmm2
7117; AVX512-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
7118; AVX512-NEXT:    vpextrd $2, %xmm0, %eax
7119; AVX512-NEXT:    vcvtusi2sd %eax, %xmm3, %xmm0
7120; AVX512-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
7121; AVX512-NEXT:    retq
7122entry:
7123  %result = call <3 x double>
7124           @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32> %x,
7125                                               metadata !"round.dynamic",
7126                                               metadata !"fpexcept.strict") #0
7127  ret <3 x double> %result
7128}
7129
7130define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
7131; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i32:
7132; CHECK:       # %bb.0: # %entry
7133; CHECK-NEXT:    movd %xmm0, %eax
7134; CHECK-NEXT:    cvtsi2ss %rax, %xmm1
7135; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
7136; CHECK-NEXT:    movd %xmm2, %eax
7137; CHECK-NEXT:    xorps %xmm2, %xmm2
7138; CHECK-NEXT:    cvtsi2ss %rax, %xmm2
7139; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
7140; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
7141; CHECK-NEXT:    movd %xmm0, %eax
7142; CHECK-NEXT:    xorps %xmm0, %xmm0
7143; CHECK-NEXT:    cvtsi2ss %rax, %xmm0
7144; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
7145; CHECK-NEXT:    movaps %xmm1, %xmm0
7146; CHECK-NEXT:    retq
7147;
7148; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i32:
7149; AVX1:       # %bb.0: # %entry
7150; AVX1-NEXT:    vextractps $1, %xmm0, %eax
7151; AVX1-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
7152; AVX1-NEXT:    vmovd %xmm0, %eax
7153; AVX1-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
7154; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
7155; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
7156; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
7157; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
7158; AVX1-NEXT:    retq
7159;
7160; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i32:
7161; AVX512:       # %bb.0: # %entry
7162; AVX512-NEXT:    vextractps $1, %xmm0, %eax
7163; AVX512-NEXT:    vcvtusi2ss %eax, %xmm1, %xmm1
7164; AVX512-NEXT:    vmovd %xmm0, %eax
7165; AVX512-NEXT:    vcvtusi2ss %eax, %xmm2, %xmm2
7166; AVX512-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
7167; AVX512-NEXT:    vpextrd $2, %xmm0, %eax
7168; AVX512-NEXT:    vcvtusi2ss %eax, %xmm3, %xmm0
7169; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
7170; AVX512-NEXT:    retq
7171entry:
7172  %result = call <3 x float>
7173           @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32> %x,
7174                                               metadata !"round.dynamic",
7175                                               metadata !"fpexcept.strict") #0
7176  ret <3 x float> %result
7177}
7178
7179define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
7180; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i64:
7181; CHECK:       # %bb.0: # %entry
7182; CHECK-NEXT:    movq %rdi, %rax
7183; CHECK-NEXT:    shrq %rax
7184; CHECK-NEXT:    movl %edi, %ecx
7185; CHECK-NEXT:    andl $1, %ecx
7186; CHECK-NEXT:    orq %rax, %rcx
7187; CHECK-NEXT:    testq %rdi, %rdi
7188; CHECK-NEXT:    cmovnsq %rdi, %rcx
7189; CHECK-NEXT:    cvtsi2sd %rcx, %xmm0
7190; CHECK-NEXT:    jns .LBB177_2
7191; CHECK-NEXT:  # %bb.1:
7192; CHECK-NEXT:    addsd %xmm0, %xmm0
7193; CHECK-NEXT:  .LBB177_2: # %entry
7194; CHECK-NEXT:    movq %rsi, %rax
7195; CHECK-NEXT:    shrq %rax
7196; CHECK-NEXT:    movl %esi, %ecx
7197; CHECK-NEXT:    andl $1, %ecx
7198; CHECK-NEXT:    orq %rax, %rcx
7199; CHECK-NEXT:    testq %rsi, %rsi
7200; CHECK-NEXT:    cmovnsq %rsi, %rcx
7201; CHECK-NEXT:    cvtsi2sd %rcx, %xmm1
7202; CHECK-NEXT:    jns .LBB177_4
7203; CHECK-NEXT:  # %bb.3:
7204; CHECK-NEXT:    addsd %xmm1, %xmm1
7205; CHECK-NEXT:  .LBB177_4: # %entry
7206; CHECK-NEXT:    movq %rdx, %rax
7207; CHECK-NEXT:    shrq %rax
7208; CHECK-NEXT:    movl %edx, %ecx
7209; CHECK-NEXT:    andl $1, %ecx
7210; CHECK-NEXT:    orq %rax, %rcx
7211; CHECK-NEXT:    testq %rdx, %rdx
7212; CHECK-NEXT:    cmovnsq %rdx, %rcx
7213; CHECK-NEXT:    cvtsi2sd %rcx, %xmm2
7214; CHECK-NEXT:    jns .LBB177_6
7215; CHECK-NEXT:  # %bb.5:
7216; CHECK-NEXT:    addsd %xmm2, %xmm2
7217; CHECK-NEXT:  .LBB177_6: # %entry
7218; CHECK-NEXT:    movsd %xmm2, -{{[0-9]+}}(%rsp)
7219; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
7220; CHECK-NEXT:    wait
7221; CHECK-NEXT:    retq
7222;
7223; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i64:
7224; AVX1:       # %bb.0: # %entry
7225; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
7226; AVX1-NEXT:    movq %rax, %rcx
7227; AVX1-NEXT:    shrq %rcx
7228; AVX1-NEXT:    movl %eax, %edx
7229; AVX1-NEXT:    andl $1, %edx
7230; AVX1-NEXT:    orq %rcx, %rdx
7231; AVX1-NEXT:    testq %rax, %rax
7232; AVX1-NEXT:    cmovnsq %rax, %rdx
7233; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm1, %xmm1
7234; AVX1-NEXT:    jns .LBB177_2
7235; AVX1-NEXT:  # %bb.1:
7236; AVX1-NEXT:    vaddsd %xmm1, %xmm1, %xmm1
7237; AVX1-NEXT:  .LBB177_2: # %entry
7238; AVX1-NEXT:    vmovq %xmm0, %rax
7239; AVX1-NEXT:    movq %rax, %rcx
7240; AVX1-NEXT:    shrq %rcx
7241; AVX1-NEXT:    movl %eax, %edx
7242; AVX1-NEXT:    andl $1, %edx
7243; AVX1-NEXT:    orq %rcx, %rdx
7244; AVX1-NEXT:    testq %rax, %rax
7245; AVX1-NEXT:    cmovnsq %rax, %rdx
7246; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm2, %xmm2
7247; AVX1-NEXT:    jns .LBB177_4
7248; AVX1-NEXT:  # %bb.3:
7249; AVX1-NEXT:    vaddsd %xmm2, %xmm2, %xmm2
7250; AVX1-NEXT:  .LBB177_4: # %entry
7251; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
7252; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
7253; AVX1-NEXT:    vmovq %xmm0, %rax
7254; AVX1-NEXT:    movq %rax, %rcx
7255; AVX1-NEXT:    shrq %rcx
7256; AVX1-NEXT:    movl %eax, %edx
7257; AVX1-NEXT:    andl $1, %edx
7258; AVX1-NEXT:    orq %rcx, %rdx
7259; AVX1-NEXT:    testq %rax, %rax
7260; AVX1-NEXT:    cmovnsq %rax, %rdx
7261; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm3, %xmm0
7262; AVX1-NEXT:    jns .LBB177_6
7263; AVX1-NEXT:  # %bb.5:
7264; AVX1-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
7265; AVX1-NEXT:  .LBB177_6: # %entry
7266; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
7267; AVX1-NEXT:    retq
7268;
7269; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i64:
7270; AVX512:       # %bb.0: # %entry
7271; AVX512-NEXT:    vpextrq $1, %xmm0, %rax
7272; AVX512-NEXT:    vcvtusi2sd %rax, %xmm1, %xmm1
7273; AVX512-NEXT:    vmovq %xmm0, %rax
7274; AVX512-NEXT:    vcvtusi2sd %rax, %xmm2, %xmm2
7275; AVX512-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
7276; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
7277; AVX512-NEXT:    vmovq %xmm0, %rax
7278; AVX512-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm0
7279; AVX512-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
7280; AVX512-NEXT:    retq
7281entry:
7282  %result = call <3 x double>
7283           @llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64> %x,
7284                                               metadata !"round.dynamic",
7285                                               metadata !"fpexcept.strict") #0
7286  ret <3 x double> %result
7287}
7288
7289define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
7290; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i64:
7291; CHECK:       # %bb.0: # %entry
7292; CHECK-NEXT:    movq %rsi, %rax
7293; CHECK-NEXT:    shrq %rax
7294; CHECK-NEXT:    movl %esi, %ecx
7295; CHECK-NEXT:    andl $1, %ecx
7296; CHECK-NEXT:    orq %rax, %rcx
7297; CHECK-NEXT:    testq %rsi, %rsi
7298; CHECK-NEXT:    cmovnsq %rsi, %rcx
7299; CHECK-NEXT:    cvtsi2ss %rcx, %xmm1
7300; CHECK-NEXT:    jns .LBB178_2
7301; CHECK-NEXT:  # %bb.1:
7302; CHECK-NEXT:    addss %xmm1, %xmm1
7303; CHECK-NEXT:  .LBB178_2: # %entry
7304; CHECK-NEXT:    movq %rdi, %rax
7305; CHECK-NEXT:    shrq %rax
7306; CHECK-NEXT:    movl %edi, %ecx
7307; CHECK-NEXT:    andl $1, %ecx
7308; CHECK-NEXT:    orq %rax, %rcx
7309; CHECK-NEXT:    testq %rdi, %rdi
7310; CHECK-NEXT:    cmovnsq %rdi, %rcx
7311; CHECK-NEXT:    cvtsi2ss %rcx, %xmm0
7312; CHECK-NEXT:    jns .LBB178_4
7313; CHECK-NEXT:  # %bb.3:
7314; CHECK-NEXT:    addss %xmm0, %xmm0
7315; CHECK-NEXT:  .LBB178_4: # %entry
7316; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7317; CHECK-NEXT:    movq %rdx, %rax
7318; CHECK-NEXT:    shrq %rax
7319; CHECK-NEXT:    movl %edx, %ecx
7320; CHECK-NEXT:    andl $1, %ecx
7321; CHECK-NEXT:    orq %rax, %rcx
7322; CHECK-NEXT:    testq %rdx, %rdx
7323; CHECK-NEXT:    cmovnsq %rdx, %rcx
7324; CHECK-NEXT:    xorps %xmm1, %xmm1
7325; CHECK-NEXT:    cvtsi2ss %rcx, %xmm1
7326; CHECK-NEXT:    jns .LBB178_6
7327; CHECK-NEXT:  # %bb.5:
7328; CHECK-NEXT:    addss %xmm1, %xmm1
7329; CHECK-NEXT:  .LBB178_6: # %entry
7330; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
7331; CHECK-NEXT:    retq
7332;
7333; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i64:
7334; AVX1:       # %bb.0: # %entry
7335; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
7336; AVX1-NEXT:    movq %rax, %rcx
7337; AVX1-NEXT:    shrq %rcx
7338; AVX1-NEXT:    movl %eax, %edx
7339; AVX1-NEXT:    andl $1, %edx
7340; AVX1-NEXT:    orq %rcx, %rdx
7341; AVX1-NEXT:    testq %rax, %rax
7342; AVX1-NEXT:    cmovnsq %rax, %rdx
7343; AVX1-NEXT:    vcvtsi2ss %rdx, %xmm1, %xmm1
7344; AVX1-NEXT:    jns .LBB178_2
7345; AVX1-NEXT:  # %bb.1:
7346; AVX1-NEXT:    vaddss %xmm1, %xmm1, %xmm1
7347; AVX1-NEXT:  .LBB178_2: # %entry
7348; AVX1-NEXT:    vmovq %xmm0, %rax
7349; AVX1-NEXT:    movq %rax, %rcx
7350; AVX1-NEXT:    shrq %rcx
7351; AVX1-NEXT:    movl %eax, %edx
7352; AVX1-NEXT:    andl $1, %edx
7353; AVX1-NEXT:    orq %rcx, %rdx
7354; AVX1-NEXT:    testq %rax, %rax
7355; AVX1-NEXT:    cmovnsq %rax, %rdx
7356; AVX1-NEXT:    vcvtsi2ss %rdx, %xmm2, %xmm2
7357; AVX1-NEXT:    jns .LBB178_4
7358; AVX1-NEXT:  # %bb.3:
7359; AVX1-NEXT:    vaddss %xmm2, %xmm2, %xmm2
7360; AVX1-NEXT:  .LBB178_4: # %entry
7361; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
7362; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
7363; AVX1-NEXT:    vmovq %xmm0, %rax
7364; AVX1-NEXT:    movq %rax, %rcx
7365; AVX1-NEXT:    shrq %rcx
7366; AVX1-NEXT:    movl %eax, %edx
7367; AVX1-NEXT:    andl $1, %edx
7368; AVX1-NEXT:    orq %rcx, %rdx
7369; AVX1-NEXT:    testq %rax, %rax
7370; AVX1-NEXT:    cmovnsq %rax, %rdx
7371; AVX1-NEXT:    vcvtsi2ss %rdx, %xmm3, %xmm0
7372; AVX1-NEXT:    jns .LBB178_6
7373; AVX1-NEXT:  # %bb.5:
7374; AVX1-NEXT:    vaddss %xmm0, %xmm0, %xmm0
7375; AVX1-NEXT:  .LBB178_6: # %entry
7376; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
7377; AVX1-NEXT:    vzeroupper
7378; AVX1-NEXT:    retq
7379;
7380; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i64:
7381; AVX512:       # %bb.0: # %entry
7382; AVX512-NEXT:    vpextrq $1, %xmm0, %rax
7383; AVX512-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
7384; AVX512-NEXT:    vmovq %xmm0, %rax
7385; AVX512-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
7386; AVX512-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
7387; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
7388; AVX512-NEXT:    vmovq %xmm0, %rax
7389; AVX512-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm0
7390; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
7391; AVX512-NEXT:    vzeroupper
7392; AVX512-NEXT:    retq
7393entry:
7394  %result = call <3 x float>
7395           @llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64> %x,
7396                                               metadata !"round.dynamic",
7397                                               metadata !"fpexcept.strict") #0
7398  ret <3 x float> %result
7399}
7400
7401define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 {
7402; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i32:
7403; CHECK:       # %bb.0: # %entry
7404; CHECK-NEXT:    xorpd %xmm2, %xmm2
7405; CHECK-NEXT:    movapd %xmm0, %xmm1
7406; CHECK-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
7407; CHECK-NEXT:    movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15]
7408; CHECK-NEXT:    orpd %xmm3, %xmm1
7409; CHECK-NEXT:    subpd %xmm3, %xmm1
7410; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
7411; CHECK-NEXT:    orpd %xmm3, %xmm0
7412; CHECK-NEXT:    subpd %xmm3, %xmm0
7413; CHECK-NEXT:    retq
7414;
7415; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i32:
7416; AVX1:       # %bb.0: # %entry
7417; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7418; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7419; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7420; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
7421; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
7422; AVX1-NEXT:    vorpd %ymm1, %ymm0, %ymm0
7423; AVX1-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
7424; AVX1-NEXT:    retq
7425;
7426; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32:
7427; AVX512:       # %bb.0: # %entry
7428; AVX512-NEXT:    vmovaps %xmm0, %xmm0
7429; AVX512-NEXT:    vcvtudq2pd %ymm0, %zmm0
7430; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
7431; AVX512-NEXT:    retq
7432entry:
7433  %result = call <4 x double>
7434           @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x,
7435                                               metadata !"round.dynamic",
7436                                               metadata !"fpexcept.strict") #0
7437  ret <4 x double> %result
7438}
7439
7440define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
7441; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i32:
7442; CHECK:       # %bb.0: # %entry
7443; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
7444; CHECK-NEXT:    pand %xmm0, %xmm1
7445; CHECK-NEXT:    por {{.*}}(%rip), %xmm1
7446; CHECK-NEXT:    psrld $16, %xmm0
7447; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
7448; CHECK-NEXT:    subps {{.*}}(%rip), %xmm0
7449; CHECK-NEXT:    addps %xmm1, %xmm0
7450; CHECK-NEXT:    retq
7451;
7452; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i32:
7453; AVX1:       # %bb.0: # %entry
7454; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
7455; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
7456; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
7457; AVX1-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
7458; AVX1-NEXT:    vaddps %xmm0, %xmm1, %xmm0
7459; AVX1-NEXT:    retq
7460;
7461; AVX512-LABEL: constrained_vector_uitofp_v4f32_v4i32:
7462; AVX512:       # %bb.0: # %entry
7463; AVX512-NEXT:    vmovaps %xmm0, %xmm0
7464; AVX512-NEXT:    vcvtudq2ps %zmm0, %zmm0
7465; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
7466; AVX512-NEXT:    vzeroupper
7467; AVX512-NEXT:    retq
7468entry:
7469  %result = call <4 x float>
7470           @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x,
7471                                               metadata !"round.dynamic",
7472                                               metadata !"fpexcept.strict") #0
7473  ret <4 x float> %result
7474}
7475
7476define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
7477; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i64:
7478; CHECK:       # %bb.0: # %entry
7479; CHECK-NEXT:    movdqa %xmm0, %xmm2
7480; CHECK-NEXT:    movq %xmm0, %rax
7481; CHECK-NEXT:    movq %rax, %rcx
7482; CHECK-NEXT:    shrq %rcx
7483; CHECK-NEXT:    movl %eax, %edx
7484; CHECK-NEXT:    andl $1, %edx
7485; CHECK-NEXT:    orq %rcx, %rdx
7486; CHECK-NEXT:    testq %rax, %rax
7487; CHECK-NEXT:    cmovnsq %rax, %rdx
7488; CHECK-NEXT:    xorps %xmm0, %xmm0
7489; CHECK-NEXT:    cvtsi2sd %rdx, %xmm0
7490; CHECK-NEXT:    jns .LBB181_2
7491; CHECK-NEXT:  # %bb.1:
7492; CHECK-NEXT:    addsd %xmm0, %xmm0
7493; CHECK-NEXT:  .LBB181_2: # %entry
7494; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
7495; CHECK-NEXT:    movq %xmm2, %rax
7496; CHECK-NEXT:    movq %rax, %rcx
7497; CHECK-NEXT:    shrq %rcx
7498; CHECK-NEXT:    movl %eax, %edx
7499; CHECK-NEXT:    andl $1, %edx
7500; CHECK-NEXT:    orq %rcx, %rdx
7501; CHECK-NEXT:    testq %rax, %rax
7502; CHECK-NEXT:    cmovnsq %rax, %rdx
7503; CHECK-NEXT:    cvtsi2sd %rdx, %xmm3
7504; CHECK-NEXT:    jns .LBB181_4
7505; CHECK-NEXT:  # %bb.3:
7506; CHECK-NEXT:    addsd %xmm3, %xmm3
7507; CHECK-NEXT:  .LBB181_4: # %entry
7508; CHECK-NEXT:    movq %xmm1, %rax
7509; CHECK-NEXT:    movq %rax, %rcx
7510; CHECK-NEXT:    shrq %rcx
7511; CHECK-NEXT:    movl %eax, %edx
7512; CHECK-NEXT:    andl $1, %edx
7513; CHECK-NEXT:    orq %rcx, %rdx
7514; CHECK-NEXT:    testq %rax, %rax
7515; CHECK-NEXT:    cmovnsq %rax, %rdx
7516; CHECK-NEXT:    xorps %xmm2, %xmm2
7517; CHECK-NEXT:    cvtsi2sd %rdx, %xmm2
7518; CHECK-NEXT:    jns .LBB181_6
7519; CHECK-NEXT:  # %bb.5:
7520; CHECK-NEXT:    addsd %xmm2, %xmm2
7521; CHECK-NEXT:  .LBB181_6: # %entry
7522; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
7523; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
7524; CHECK-NEXT:    movq %xmm1, %rax
7525; CHECK-NEXT:    movq %rax, %rcx
7526; CHECK-NEXT:    shrq %rcx
7527; CHECK-NEXT:    movl %eax, %edx
7528; CHECK-NEXT:    andl $1, %edx
7529; CHECK-NEXT:    orq %rcx, %rdx
7530; CHECK-NEXT:    testq %rax, %rax
7531; CHECK-NEXT:    cmovnsq %rax, %rdx
7532; CHECK-NEXT:    xorps %xmm1, %xmm1
7533; CHECK-NEXT:    cvtsi2sd %rdx, %xmm1
7534; CHECK-NEXT:    jns .LBB181_8
7535; CHECK-NEXT:  # %bb.7:
7536; CHECK-NEXT:    addsd %xmm1, %xmm1
7537; CHECK-NEXT:  .LBB181_8: # %entry
7538; CHECK-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
7539; CHECK-NEXT:    movapd %xmm2, %xmm1
7540; CHECK-NEXT:    retq
7541;
7542; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i64:
7543; AVX1:       # %bb.0: # %entry
7544; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
7545; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
7546; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7547; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
7548; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm3
7549; AVX1-NEXT:    vmovq %xmm2, %rax
7550; AVX1-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm2
7551; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
7552; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
7553; AVX1-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm3
7554; AVX1-NEXT:    vmovq %xmm1, %rax
7555; AVX1-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm1
7556; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
7557; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
7558; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7559; AVX1-NEXT:    vpsrlq $32, %xmm2, %xmm2
7560; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
7561; AVX1-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm3
7562; AVX1-NEXT:    vmovq %xmm2, %rax
7563; AVX1-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm2
7564; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
7565; AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
7566; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
7567; AVX1-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm3
7568; AVX1-NEXT:    vmovq %xmm0, %rax
7569; AVX1-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm0
7570; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
7571; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
7572; AVX1-NEXT:    vmulpd {{.*}}(%rip), %ymm0, %ymm0
7573; AVX1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
7574; AVX1-NEXT:    retq
7575;
7576; AVX512F-LABEL: constrained_vector_uitofp_v4f64_v4i64:
7577; AVX512F:       # %bb.0: # %entry
7578; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
7579; AVX512F-NEXT:    vpextrq $1, %xmm1, %rax
7580; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm2, %xmm2
7581; AVX512F-NEXT:    vmovq %xmm1, %rax
7582; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm1
7583; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
7584; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
7585; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm2
7586; AVX512F-NEXT:    vmovq %xmm0, %rax
7587; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm0
7588; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
7589; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
7590; AVX512F-NEXT:    retq
7591;
7592; AVX512DQ-LABEL: constrained_vector_uitofp_v4f64_v4i64:
7593; AVX512DQ:       # %bb.0: # %entry
7594; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
7595; AVX512DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
7596; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
7597; AVX512DQ-NEXT:    retq
7598entry:
7599  %result = call <4 x double>
7600           @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x,
7601                                               metadata !"round.dynamic",
7602                                               metadata !"fpexcept.strict") #0
7603  ret <4 x double> %result
7604}
7605
7606define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
7607; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i64:
7608; CHECK:       # %bb.0: # %entry
7609; CHECK-NEXT:    movq %xmm1, %rax
7610; CHECK-NEXT:    movq %rax, %rcx
7611; CHECK-NEXT:    shrq %rcx
7612; CHECK-NEXT:    movl %eax, %edx
7613; CHECK-NEXT:    andl $1, %edx
7614; CHECK-NEXT:    orq %rcx, %rdx
7615; CHECK-NEXT:    testq %rax, %rax
7616; CHECK-NEXT:    cmovnsq %rax, %rdx
7617; CHECK-NEXT:    cvtsi2ss %rdx, %xmm2
7618; CHECK-NEXT:    jns .LBB182_2
7619; CHECK-NEXT:  # %bb.1:
7620; CHECK-NEXT:    addss %xmm2, %xmm2
7621; CHECK-NEXT:  .LBB182_2: # %entry
7622; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
7623; CHECK-NEXT:    movq %xmm1, %rax
7624; CHECK-NEXT:    movq %rax, %rcx
7625; CHECK-NEXT:    shrq %rcx
7626; CHECK-NEXT:    movl %eax, %edx
7627; CHECK-NEXT:    andl $1, %edx
7628; CHECK-NEXT:    orq %rcx, %rdx
7629; CHECK-NEXT:    testq %rax, %rax
7630; CHECK-NEXT:    cmovnsq %rax, %rdx
7631; CHECK-NEXT:    cvtsi2ss %rdx, %xmm3
7632; CHECK-NEXT:    jns .LBB182_4
7633; CHECK-NEXT:  # %bb.3:
7634; CHECK-NEXT:    addss %xmm3, %xmm3
7635; CHECK-NEXT:  .LBB182_4: # %entry
7636; CHECK-NEXT:    movq %xmm0, %rax
7637; CHECK-NEXT:    movq %rax, %rcx
7638; CHECK-NEXT:    shrq %rcx
7639; CHECK-NEXT:    movl %eax, %edx
7640; CHECK-NEXT:    andl $1, %edx
7641; CHECK-NEXT:    orq %rcx, %rdx
7642; CHECK-NEXT:    testq %rax, %rax
7643; CHECK-NEXT:    cmovnsq %rax, %rdx
7644; CHECK-NEXT:    xorps %xmm1, %xmm1
7645; CHECK-NEXT:    cvtsi2ss %rdx, %xmm1
7646; CHECK-NEXT:    jns .LBB182_6
7647; CHECK-NEXT:  # %bb.5:
7648; CHECK-NEXT:    addss %xmm1, %xmm1
7649; CHECK-NEXT:  .LBB182_6: # %entry
7650; CHECK-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
7651; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
7652; CHECK-NEXT:    movq %xmm0, %rax
7653; CHECK-NEXT:    movq %rax, %rcx
7654; CHECK-NEXT:    shrq %rcx
7655; CHECK-NEXT:    movl %eax, %edx
7656; CHECK-NEXT:    andl $1, %edx
7657; CHECK-NEXT:    orq %rcx, %rdx
7658; CHECK-NEXT:    testq %rax, %rax
7659; CHECK-NEXT:    cmovnsq %rax, %rdx
7660; CHECK-NEXT:    xorps %xmm0, %xmm0
7661; CHECK-NEXT:    cvtsi2ss %rdx, %xmm0
7662; CHECK-NEXT:    jns .LBB182_8
7663; CHECK-NEXT:  # %bb.7:
7664; CHECK-NEXT:    addss %xmm0, %xmm0
7665; CHECK-NEXT:  .LBB182_8: # %entry
7666; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7667; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
7668; CHECK-NEXT:    movaps %xmm1, %xmm0
7669; CHECK-NEXT:    retq
7670;
7671; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i64:
7672; AVX1:       # %bb.0: # %entry
7673; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm1
7674; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7675; AVX1-NEXT:    vpsrlq $1, %xmm2, %xmm3
7676; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
7677; AVX1-NEXT:    vandpd {{.*}}(%rip), %ymm0, %ymm3
7678; AVX1-NEXT:    vorpd %ymm3, %ymm1, %ymm1
7679; AVX1-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
7680; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
7681; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
7682; AVX1-NEXT:    vmovq %xmm1, %rax
7683; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm4
7684; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
7685; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
7686; AVX1-NEXT:    vmovq %xmm1, %rax
7687; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
7688; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
7689; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
7690; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm1
7691; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
7692; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm3
7693; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
7694; AVX1-NEXT:    vblendvps %xmm0, %xmm3, %xmm1, %xmm0
7695; AVX1-NEXT:    vzeroupper
7696; AVX1-NEXT:    retq
7697;
7698; AVX512F-LABEL: constrained_vector_uitofp_v4f32_v4i64:
7699; AVX512F:       # %bb.0: # %entry
7700; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
7701; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
7702; AVX512F-NEXT:    vmovq %xmm0, %rax
7703; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
7704; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
7705; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
7706; AVX512F-NEXT:    vmovq %xmm0, %rax
7707; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm2
7708; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
7709; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
7710; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm0
7711; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
7712; AVX512F-NEXT:    vzeroupper
7713; AVX512F-NEXT:    retq
7714;
7715; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64:
7716; AVX512DQ:       # %bb.0: # %entry
7717; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
7718; AVX512DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
7719; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
7720; AVX512DQ-NEXT:    vzeroupper
7721; AVX512DQ-NEXT:    retq
7722entry:
7723  %result = call <4 x float>
7724           @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x,
7725                                               metadata !"round.dynamic",
7726                                               metadata !"fpexcept.strict") #0
7727  ret <4 x float> %result
7728}
7729
7730; Simple test to make sure we don't fuse vselect+strict_fadd into a masked operation.
7731define <16 x float> @vpaddd_mask_test(<16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone strictfp {
7732; CHECK-LABEL: vpaddd_mask_test:
7733; CHECK:       # %bb.0:
7734; CHECK-NEXT:    pxor %xmm10, %xmm10
7735; CHECK-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
7736; CHECK-NEXT:    pcmpeqd %xmm10, %xmm8
7737; CHECK-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
7738; CHECK-NEXT:    pcmpeqd %xmm10, %xmm9
7739; CHECK-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
7740; CHECK-NEXT:    pcmpeqd %xmm10, %xmm11
7741; CHECK-NEXT:    pcmpeqd {{[0-9]+}}(%rsp), %xmm10
7742; CHECK-NEXT:    addps %xmm3, %xmm7
7743; CHECK-NEXT:    addps %xmm2, %xmm6
7744; CHECK-NEXT:    addps %xmm1, %xmm5
7745; CHECK-NEXT:    addps %xmm0, %xmm4
7746; CHECK-NEXT:    andps %xmm10, %xmm0
7747; CHECK-NEXT:    andnps %xmm4, %xmm10
7748; CHECK-NEXT:    orps %xmm10, %xmm0
7749; CHECK-NEXT:    andps %xmm11, %xmm1
7750; CHECK-NEXT:    andnps %xmm5, %xmm11
7751; CHECK-NEXT:    orps %xmm11, %xmm1
7752; CHECK-NEXT:    andps %xmm9, %xmm2
7753; CHECK-NEXT:    andnps %xmm6, %xmm9
7754; CHECK-NEXT:    orps %xmm9, %xmm2
7755; CHECK-NEXT:    andps %xmm8, %xmm3
7756; CHECK-NEXT:    andnps %xmm7, %xmm8
7757; CHECK-NEXT:    orps %xmm8, %xmm3
7758; CHECK-NEXT:    retq
7759;
7760; AVX1-LABEL: vpaddd_mask_test:
7761; AVX1:       # %bb.0:
7762; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
7763; AVX1-NEXT:    vpxor %xmm7, %xmm7, %xmm7
7764; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm6, %xmm6
7765; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm5, %xmm5
7766; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
7767; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm6
7768; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm6, %xmm6
7769; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm4, %xmm4
7770; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm4, %ymm4
7771; AVX1-NEXT:    vaddps %ymm3, %ymm1, %ymm3
7772; AVX1-NEXT:    vaddps %ymm2, %ymm0, %ymm2
7773; AVX1-NEXT:    vblendvps %ymm4, %ymm0, %ymm2, %ymm0
7774; AVX1-NEXT:    vblendvps %ymm5, %ymm1, %ymm3, %ymm1
7775; AVX1-NEXT:    retq
7776;
7777; AVX512-LABEL: vpaddd_mask_test:
7778; AVX512:       # %bb.0:
7779; AVX512-NEXT:    vptestmd %zmm2, %zmm2, %k1
7780; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm1
7781; AVX512-NEXT:    vmovaps %zmm1, %zmm0 {%k1}
7782; AVX512-NEXT:    retq
7783  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
7784  %x = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %i, <16 x float> %j, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
7785  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %i
7786  ret <16 x float> %r
7787}
7788declare  <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata)
7789
7790attributes #0 = { strictfp }
7791
7792; Single width declarations
7793declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
7794declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
7795declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
7796declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
7797declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata)
7798declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
7799declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
7800declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
7801declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
7802declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
7803declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
7804declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
7805declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
7806declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
7807declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
7808declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
7809declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
7810declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata)
7811declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata)
7812declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float>, metadata)
7813declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata)
7814declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata)
7815declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
7816declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float>, metadata)
7817declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata)
7818declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata)
7819declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
7820declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
7821declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
7822declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata)
7823declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata)
7824declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata)
7825declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata)
7826declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
7827declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
7828declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
7829declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
7830declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
7831declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
7832declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
7833declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
7834
7835; Scalar width declarations
7836declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
7837declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata)
7838declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata)
7839declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata)
7840declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata)
7841declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata)
7842declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata)
7843declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata)
7844declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata)
7845declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata)
7846declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata)
7847declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata)
7848declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata)
7849declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata)
7850declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata)
7851declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata)
7852declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
7853declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata)
7854declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata)
7855declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32(<1 x float>, metadata)
7856declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32(<1 x float>, metadata)
7857declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double>, metadata)
7858declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double>, metadata)
7859declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32(<1 x float>, metadata)
7860declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32(<1 x float>, metadata)
7861declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double>, metadata)
7862declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double>, metadata)
7863declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
7864declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
7865declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata)
7866declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata)
7867declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata)
7868declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata)
7869declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32>, metadata, metadata)
7870declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32>, metadata, metadata)
7871declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64>, metadata, metadata)
7872declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64>, metadata, metadata)
7873declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32>, metadata, metadata)
7874declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32>, metadata, metadata)
7875declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64>, metadata, metadata)
7876declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64>, metadata, metadata)
7877
7878; Illegal width declarations
7879declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
7880declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata)
7881declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
7882declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata)
7883declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
7884declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata)
7885declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata)
7886declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata)
7887declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata)
7888declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata)
7889declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata)
7890declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata)
7891declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata)
7892declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata)
7893declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata)
7894declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata)
7895declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata)
7896declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata)
7897declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata)
7898declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata)
7899declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata)
7900declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata)
7901declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata)
7902declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata)
7903declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata)
7904declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata)
7905declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata)
7906declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata)
7907declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata)
7908declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata)
7909declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata)
7910declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
7911declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
7912declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
7913declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata)
7914declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata)
7915declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata)
7916declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata)
7917declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(<3 x float>, metadata)
7918declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32(<3 x float>, metadata)
7919declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(<3 x double>, metadata)
7920declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64(<3 x double>, metadata)
7921declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(<3 x float>, metadata)
7922declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32(<3 x float>, metadata)
7923declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(<3 x double>, metadata)
7924declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64(<3 x double>, metadata)
7925declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
7926declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
7927declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata)
7928declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata)
7929declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata)
7930declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata)
7931declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata)
7932declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata)
7933declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata)
7934declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata)
7935declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32>, metadata, metadata)
7936declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32>, metadata, metadata)
7937declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64>, metadata, metadata)
7938declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64>, metadata, metadata)
7939declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32>, metadata, metadata)
7940declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32>, metadata, metadata)
7941declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64>, metadata, metadata)
7942declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64>, metadata, metadata)
7943
7944; Double width declarations
7945declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
7946declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
7947declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
7948declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
7949declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata)
7950declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
7951declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
7952declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
7953declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
7954declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
7955declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
7956declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
7957declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
7958declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
7959declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
7960declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
7961declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
7962declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata)
7963declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata)
7964declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata)
7965declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata)
7966declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double>, metadata)
7967declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata)
7968declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata)
7969declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata)
7970declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double>, metadata)
7971declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double>, metadata)
7972declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
7973declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
7974declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata)
7975declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata)
7976declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata)
7977declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata)
7978declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
7979declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
7980declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
7981declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
7982declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
7983declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
7984declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
7985declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
7986
7987