• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4
5declare float @fmaxf(float, float)
6declare double @fmax(double, double)
7declare x86_fp80 @fmaxl(x86_fp80, x86_fp80)
8declare float @llvm.maxnum.f32(float, float)
9declare double @llvm.maxnum.f64(double, double)
10declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80)
11
12declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
13declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
14declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
15declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
16declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
17
18; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
19
20define float @test_fmaxf(float %x, float %y) {
21; SSE-LABEL: test_fmaxf:
22; SSE:       # %bb.0:
23; SSE-NEXT:    movaps %xmm0, %xmm2
24; SSE-NEXT:    cmpunordss %xmm0, %xmm2
25; SSE-NEXT:    movaps %xmm2, %xmm3
26; SSE-NEXT:    andps %xmm1, %xmm3
27; SSE-NEXT:    maxss %xmm0, %xmm1
28; SSE-NEXT:    andnps %xmm1, %xmm2
29; SSE-NEXT:    orps %xmm3, %xmm2
30; SSE-NEXT:    movaps %xmm2, %xmm0
31; SSE-NEXT:    retq
32;
33; AVX-LABEL: test_fmaxf:
34; AVX:       # %bb.0:
35; AVX-NEXT:    vmaxss %xmm0, %xmm1, %xmm2
36; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
37; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
38; AVX-NEXT:    retq
39  %z = call float @fmaxf(float %x, float %y) readnone
40  ret float %z
41}
42
43define float @test_fmaxf_minsize(float %x, float %y) minsize {
44; CHECK-LABEL: test_fmaxf_minsize:
45; CHECK:       # %bb.0:
46; CHECK-NEXT:    jmp fmaxf # TAILCALL
47  %z = call float @fmaxf(float %x, float %y) readnone
48  ret float %z
49}
50
51; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
52
53define double @test_fmax(double %x, double %y) {
54; SSE-LABEL: test_fmax:
55; SSE:       # %bb.0:
56; SSE-NEXT:    movapd %xmm0, %xmm2
57; SSE-NEXT:    cmpunordsd %xmm0, %xmm2
58; SSE-NEXT:    movapd %xmm2, %xmm3
59; SSE-NEXT:    andpd %xmm1, %xmm3
60; SSE-NEXT:    maxsd %xmm0, %xmm1
61; SSE-NEXT:    andnpd %xmm1, %xmm2
62; SSE-NEXT:    orpd %xmm3, %xmm2
63; SSE-NEXT:    movapd %xmm2, %xmm0
64; SSE-NEXT:    retq
65;
66; AVX-LABEL: test_fmax:
67; AVX:       # %bb.0:
68; AVX-NEXT:    vmaxsd %xmm0, %xmm1, %xmm2
69; AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm0
70; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
71; AVX-NEXT:    retq
72  %z = call double @fmax(double %x, double %y) readnone
73  ret double %z
74}
75
76define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) {
77; CHECK-LABEL: test_fmaxl:
78; CHECK:       # %bb.0:
79; CHECK-NEXT:    subq $40, %rsp
80; CHECK-NEXT:    .cfi_def_cfa_offset 48
81; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
82; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
83; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
84; CHECK-NEXT:    fstpt (%rsp)
85; CHECK-NEXT:    callq fmaxl
86; CHECK-NEXT:    addq $40, %rsp
87; CHECK-NEXT:    .cfi_def_cfa_offset 8
88; CHECK-NEXT:    retq
89  %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone
90  ret x86_fp80 %z
91}
92
93define float @test_intrinsic_fmaxf(float %x, float %y) {
94; SSE-LABEL: test_intrinsic_fmaxf:
95; SSE:       # %bb.0:
96; SSE-NEXT:    movaps %xmm0, %xmm2
97; SSE-NEXT:    cmpunordss %xmm0, %xmm2
98; SSE-NEXT:    movaps %xmm2, %xmm3
99; SSE-NEXT:    andps %xmm1, %xmm3
100; SSE-NEXT:    maxss %xmm0, %xmm1
101; SSE-NEXT:    andnps %xmm1, %xmm2
102; SSE-NEXT:    orps %xmm3, %xmm2
103; SSE-NEXT:    movaps %xmm2, %xmm0
104; SSE-NEXT:    retq
105;
106; AVX-LABEL: test_intrinsic_fmaxf:
107; AVX:       # %bb.0:
108; AVX-NEXT:    vmaxss %xmm0, %xmm1, %xmm2
109; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
110; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
111; AVX-NEXT:    retq
112  %z = call float @llvm.maxnum.f32(float %x, float %y) readnone
113  ret float %z
114}
115
116define double @test_intrinsic_fmax(double %x, double %y) {
117; SSE-LABEL: test_intrinsic_fmax:
118; SSE:       # %bb.0:
119; SSE-NEXT:    movapd %xmm0, %xmm2
120; SSE-NEXT:    cmpunordsd %xmm0, %xmm2
121; SSE-NEXT:    movapd %xmm2, %xmm3
122; SSE-NEXT:    andpd %xmm1, %xmm3
123; SSE-NEXT:    maxsd %xmm0, %xmm1
124; SSE-NEXT:    andnpd %xmm1, %xmm2
125; SSE-NEXT:    orpd %xmm3, %xmm2
126; SSE-NEXT:    movapd %xmm2, %xmm0
127; SSE-NEXT:    retq
128;
129; AVX-LABEL: test_intrinsic_fmax:
130; AVX:       # %bb.0:
131; AVX-NEXT:    vmaxsd %xmm0, %xmm1, %xmm2
132; AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm0
133; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
134; AVX-NEXT:    retq
135  %z = call double @llvm.maxnum.f64(double %x, double %y) readnone
136  ret double %z
137}
138
139define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) {
140; CHECK-LABEL: test_intrinsic_fmaxl:
141; CHECK:       # %bb.0:
142; CHECK-NEXT:    subq $40, %rsp
143; CHECK-NEXT:    .cfi_def_cfa_offset 48
144; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
145; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
146; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
147; CHECK-NEXT:    fstpt (%rsp)
148; CHECK-NEXT:    callq fmaxl
149; CHECK-NEXT:    addq $40, %rsp
150; CHECK-NEXT:    .cfi_def_cfa_offset 8
151; CHECK-NEXT:    retq
152  %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
153  ret x86_fp80 %z
154}
155
156define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) {
157; SSE-LABEL: test_intrinsic_fmax_v2f32:
158; SSE:       # %bb.0:
159; SSE-NEXT:    movaps %xmm1, %xmm2
160; SSE-NEXT:    maxps %xmm0, %xmm2
161; SSE-NEXT:    cmpunordps %xmm0, %xmm0
162; SSE-NEXT:    andps %xmm0, %xmm1
163; SSE-NEXT:    andnps %xmm2, %xmm0
164; SSE-NEXT:    orps %xmm1, %xmm0
165; SSE-NEXT:    retq
166;
167; AVX-LABEL: test_intrinsic_fmax_v2f32:
168; AVX:       # %bb.0:
169; AVX-NEXT:    vmaxps %xmm0, %xmm1, %xmm2
170; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm0
171; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
172; AVX-NEXT:    retq
173  %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
174  ret <2 x float> %z
175}
176
177define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) {
178; SSE-LABEL: test_intrinsic_fmax_v4f32:
179; SSE:       # %bb.0:
180; SSE-NEXT:    movaps %xmm1, %xmm2
181; SSE-NEXT:    maxps %xmm0, %xmm2
182; SSE-NEXT:    cmpunordps %xmm0, %xmm0
183; SSE-NEXT:    andps %xmm0, %xmm1
184; SSE-NEXT:    andnps %xmm2, %xmm0
185; SSE-NEXT:    orps %xmm1, %xmm0
186; SSE-NEXT:    retq
187;
188; AVX-LABEL: test_intrinsic_fmax_v4f32:
189; AVX:       # %bb.0:
190; AVX-NEXT:    vmaxps %xmm0, %xmm1, %xmm2
191; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm0
192; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
193; AVX-NEXT:    retq
194  %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
195  ret <4 x float> %z
196}
197
198define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) {
199; SSE-LABEL: test_intrinsic_fmax_v2f64:
200; SSE:       # %bb.0:
201; SSE-NEXT:    movapd %xmm1, %xmm2
202; SSE-NEXT:    maxpd %xmm0, %xmm2
203; SSE-NEXT:    cmpunordpd %xmm0, %xmm0
204; SSE-NEXT:    andpd %xmm0, %xmm1
205; SSE-NEXT:    andnpd %xmm2, %xmm0
206; SSE-NEXT:    orpd %xmm1, %xmm0
207; SSE-NEXT:    retq
208;
209; AVX-LABEL: test_intrinsic_fmax_v2f64:
210; AVX:       # %bb.0:
211; AVX-NEXT:    vmaxpd %xmm0, %xmm1, %xmm2
212; AVX-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm0
213; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
214; AVX-NEXT:    retq
215  %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
216  ret <2 x double> %z
217}
218
219define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) {
220; SSE-LABEL: test_intrinsic_fmax_v4f64:
221; SSE:       # %bb.0:
222; SSE-NEXT:    movapd %xmm2, %xmm4
223; SSE-NEXT:    maxpd %xmm0, %xmm4
224; SSE-NEXT:    cmpunordpd %xmm0, %xmm0
225; SSE-NEXT:    andpd %xmm0, %xmm2
226; SSE-NEXT:    andnpd %xmm4, %xmm0
227; SSE-NEXT:    orpd %xmm2, %xmm0
228; SSE-NEXT:    movapd %xmm3, %xmm2
229; SSE-NEXT:    maxpd %xmm1, %xmm2
230; SSE-NEXT:    cmpunordpd %xmm1, %xmm1
231; SSE-NEXT:    andpd %xmm1, %xmm3
232; SSE-NEXT:    andnpd %xmm2, %xmm1
233; SSE-NEXT:    orpd %xmm3, %xmm1
234; SSE-NEXT:    retq
235;
236; AVX-LABEL: test_intrinsic_fmax_v4f64:
237; AVX:       # %bb.0:
238; AVX-NEXT:    vmaxpd %ymm0, %ymm1, %ymm2
239; AVX-NEXT:    vcmpunordpd %ymm0, %ymm0, %ymm0
240; AVX-NEXT:    vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
241; AVX-NEXT:    retq
242  %z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
243  ret <4 x double> %z
244}
245
246define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) {
247; SSE-LABEL: test_intrinsic_fmax_v8f64:
248; SSE:       # %bb.0:
249; SSE-NEXT:    movapd %xmm4, %xmm8
250; SSE-NEXT:    maxpd %xmm0, %xmm8
251; SSE-NEXT:    cmpunordpd %xmm0, %xmm0
252; SSE-NEXT:    andpd %xmm0, %xmm4
253; SSE-NEXT:    andnpd %xmm8, %xmm0
254; SSE-NEXT:    orpd %xmm4, %xmm0
255; SSE-NEXT:    movapd %xmm5, %xmm4
256; SSE-NEXT:    maxpd %xmm1, %xmm4
257; SSE-NEXT:    cmpunordpd %xmm1, %xmm1
258; SSE-NEXT:    andpd %xmm1, %xmm5
259; SSE-NEXT:    andnpd %xmm4, %xmm1
260; SSE-NEXT:    orpd %xmm5, %xmm1
261; SSE-NEXT:    movapd %xmm6, %xmm4
262; SSE-NEXT:    maxpd %xmm2, %xmm4
263; SSE-NEXT:    cmpunordpd %xmm2, %xmm2
264; SSE-NEXT:    andpd %xmm2, %xmm6
265; SSE-NEXT:    andnpd %xmm4, %xmm2
266; SSE-NEXT:    orpd %xmm6, %xmm2
267; SSE-NEXT:    movapd %xmm7, %xmm4
268; SSE-NEXT:    maxpd %xmm3, %xmm4
269; SSE-NEXT:    cmpunordpd %xmm3, %xmm3
270; SSE-NEXT:    andpd %xmm3, %xmm7
271; SSE-NEXT:    andnpd %xmm4, %xmm3
272; SSE-NEXT:    orpd %xmm7, %xmm3
273; SSE-NEXT:    retq
274;
275; AVX-LABEL: test_intrinsic_fmax_v8f64:
276; AVX:       # %bb.0:
277; AVX-NEXT:    vmaxpd %ymm0, %ymm2, %ymm4
278; AVX-NEXT:    vcmpunordpd %ymm0, %ymm0, %ymm0
279; AVX-NEXT:    vblendvpd %ymm0, %ymm2, %ymm4, %ymm0
280; AVX-NEXT:    vmaxpd %ymm1, %ymm3, %ymm2
281; AVX-NEXT:    vcmpunordpd %ymm1, %ymm1, %ymm1
282; AVX-NEXT:    vblendvpd %ymm1, %ymm3, %ymm2, %ymm1
283; AVX-NEXT:    retq
284  %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone
285  ret <8 x double> %z
286}
287
288; The IR-level FMF propagate to the node. With nnan, there's no need to blend.
289
290define double @maxnum_intrinsic_nnan_fmf_f64(double %a, double %b) {
291; SSE-LABEL: maxnum_intrinsic_nnan_fmf_f64:
292; SSE:       # %bb.0:
293; SSE-NEXT:    maxsd %xmm1, %xmm0
294; SSE-NEXT:    retq
295;
296; AVX-LABEL: maxnum_intrinsic_nnan_fmf_f64:
297; AVX:       # %bb.0:
298; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
299; AVX-NEXT:    retq
300  %r = tail call nnan double @llvm.maxnum.f64(double %a, double %b)
301  ret double %r
302}
303
304; Make sure vectors work too.
305
306define <4 x float> @maxnum_intrinsic_nnan_fmf_f432(<4 x float> %a, <4 x float> %b) {
307; SSE-LABEL: maxnum_intrinsic_nnan_fmf_f432:
308; SSE:       # %bb.0:
309; SSE-NEXT:    maxps %xmm1, %xmm0
310; SSE-NEXT:    retq
311;
312; AVX-LABEL: maxnum_intrinsic_nnan_fmf_f432:
313; AVX:       # %bb.0:
314; AVX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
315; AVX-NEXT:    retq
316  %r = tail call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
317  ret <4 x float> %r
318}
319
320; Current (but legacy someday): a function-level attribute should also enable the fold.
321
322define float @maxnum_intrinsic_nnan_attr_f32(float %a, float %b) #0 {
323; SSE-LABEL: maxnum_intrinsic_nnan_attr_f32:
324; SSE:       # %bb.0:
325; SSE-NEXT:    maxss %xmm1, %xmm0
326; SSE-NEXT:    retq
327;
328; AVX-LABEL: maxnum_intrinsic_nnan_attr_f32:
329; AVX:       # %bb.0:
330; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
331; AVX-NEXT:    retq
332  %r = tail call float @llvm.maxnum.f32(float %a, float %b)
333  ret float %r
334}
335
336; Make sure vectors work too.
337
338define <2 x double> @maxnum_intrinsic_nnan_attr_f64(<2 x double> %a, <2 x double> %b) #0 {
339; SSE-LABEL: maxnum_intrinsic_nnan_attr_f64:
340; SSE:       # %bb.0:
341; SSE-NEXT:    maxpd %xmm1, %xmm0
342; SSE-NEXT:    retq
343;
344; AVX-LABEL: maxnum_intrinsic_nnan_attr_f64:
345; AVX:       # %bb.0:
346; AVX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
347; AVX-NEXT:    retq
348  %r = tail call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b)
349  ret <2 x double> %r
350}
351
352attributes #0 = { "no-nans-fp-math"="true" }
353
354