• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4
5declare float @fminf(float, float)
6declare double @fmin(double, double)
7declare x86_fp80 @fminl(x86_fp80, x86_fp80)
8declare float @llvm.minnum.f32(float, float)
9declare double @llvm.minnum.f64(double, double)
10declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80)
11
12declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
13declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
14declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
15declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>)
16declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>)
17
18; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
19
20define float @test_fminf(float %x, float %y) {
21; SSE-LABEL: test_fminf:
22; SSE:       # %bb.0:
23; SSE-NEXT:    movaps %xmm0, %xmm2
24; SSE-NEXT:    cmpunordss %xmm0, %xmm2
25; SSE-NEXT:    movaps %xmm2, %xmm3
26; SSE-NEXT:    andps %xmm1, %xmm3
27; SSE-NEXT:    minss %xmm0, %xmm1
28; SSE-NEXT:    andnps %xmm1, %xmm2
29; SSE-NEXT:    orps %xmm3, %xmm2
30; SSE-NEXT:    movaps %xmm2, %xmm0
31; SSE-NEXT:    retq
32;
33; AVX-LABEL: test_fminf:
34; AVX:       # %bb.0:
35; AVX-NEXT:    vminss %xmm0, %xmm1, %xmm2
36; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
37; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
38; AVX-NEXT:    retq
39  %z = call float @fminf(float %x, float %y) readnone
40  ret float %z
41}
42
43; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
44
45define double @test_fmin(double %x, double %y) {
46; SSE-LABEL: test_fmin:
47; SSE:       # %bb.0:
48; SSE-NEXT:    movapd %xmm0, %xmm2
49; SSE-NEXT:    cmpunordsd %xmm0, %xmm2
50; SSE-NEXT:    movapd %xmm2, %xmm3
51; SSE-NEXT:    andpd %xmm1, %xmm3
52; SSE-NEXT:    minsd %xmm0, %xmm1
53; SSE-NEXT:    andnpd %xmm1, %xmm2
54; SSE-NEXT:    orpd %xmm3, %xmm2
55; SSE-NEXT:    movapd %xmm2, %xmm0
56; SSE-NEXT:    retq
57;
58; AVX-LABEL: test_fmin:
59; AVX:       # %bb.0:
60; AVX-NEXT:    vminsd %xmm0, %xmm1, %xmm2
61; AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm0
62; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
63; AVX-NEXT:    retq
64  %z = call double @fmin(double %x, double %y) readnone
65  ret double %z
66}
67
68define x86_fp80 @test_fminl(x86_fp80 %x, x86_fp80 %y) {
69; CHECK-LABEL: test_fminl:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    subq $40, %rsp
72; CHECK-NEXT:    .cfi_def_cfa_offset 48
73; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
74; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
75; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
76; CHECK-NEXT:    fstpt (%rsp)
77; CHECK-NEXT:    callq fminl
78; CHECK-NEXT:    addq $40, %rsp
79; CHECK-NEXT:    .cfi_def_cfa_offset 8
80; CHECK-NEXT:    retq
81  %z = call x86_fp80 @fminl(x86_fp80 %x, x86_fp80 %y) readnone
82  ret x86_fp80 %z
83}
84
85define float @test_intrinsic_fminf(float %x, float %y) {
86; SSE-LABEL: test_intrinsic_fminf:
87; SSE:       # %bb.0:
88; SSE-NEXT:    movaps %xmm0, %xmm2
89; SSE-NEXT:    cmpunordss %xmm0, %xmm2
90; SSE-NEXT:    movaps %xmm2, %xmm3
91; SSE-NEXT:    andps %xmm1, %xmm3
92; SSE-NEXT:    minss %xmm0, %xmm1
93; SSE-NEXT:    andnps %xmm1, %xmm2
94; SSE-NEXT:    orps %xmm3, %xmm2
95; SSE-NEXT:    movaps %xmm2, %xmm0
96; SSE-NEXT:    retq
97;
98; AVX-LABEL: test_intrinsic_fminf:
99; AVX:       # %bb.0:
100; AVX-NEXT:    vminss %xmm0, %xmm1, %xmm2
101; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
102; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
103; AVX-NEXT:    retq
104  %z = call float @llvm.minnum.f32(float %x, float %y) readnone
105  ret float %z
106}
107
108define double @test_intrinsic_fmin(double %x, double %y) {
109; SSE-LABEL: test_intrinsic_fmin:
110; SSE:       # %bb.0:
111; SSE-NEXT:    movapd %xmm0, %xmm2
112; SSE-NEXT:    cmpunordsd %xmm0, %xmm2
113; SSE-NEXT:    movapd %xmm2, %xmm3
114; SSE-NEXT:    andpd %xmm1, %xmm3
115; SSE-NEXT:    minsd %xmm0, %xmm1
116; SSE-NEXT:    andnpd %xmm1, %xmm2
117; SSE-NEXT:    orpd %xmm3, %xmm2
118; SSE-NEXT:    movapd %xmm2, %xmm0
119; SSE-NEXT:    retq
120;
121; AVX-LABEL: test_intrinsic_fmin:
122; AVX:       # %bb.0:
123; AVX-NEXT:    vminsd %xmm0, %xmm1, %xmm2
124; AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm0
125; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
126; AVX-NEXT:    retq
127  %z = call double @llvm.minnum.f64(double %x, double %y) readnone
128  ret double %z
129}
130
131define x86_fp80 @test_intrinsic_fminl(x86_fp80 %x, x86_fp80 %y) {
132; CHECK-LABEL: test_intrinsic_fminl:
133; CHECK:       # %bb.0:
134; CHECK-NEXT:    subq $40, %rsp
135; CHECK-NEXT:    .cfi_def_cfa_offset 48
136; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
137; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
138; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
139; CHECK-NEXT:    fstpt (%rsp)
140; CHECK-NEXT:    callq fminl
141; CHECK-NEXT:    addq $40, %rsp
142; CHECK-NEXT:    .cfi_def_cfa_offset 8
143; CHECK-NEXT:    retq
144  %z = call x86_fp80 @llvm.minnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
145  ret x86_fp80 %z
146}
147
148define <2 x float> @test_intrinsic_fmin_v2f32(<2 x float> %x, <2 x float> %y) {
149; SSE-LABEL: test_intrinsic_fmin_v2f32:
150; SSE:       # %bb.0:
151; SSE-NEXT:    movaps %xmm1, %xmm2
152; SSE-NEXT:    minps %xmm0, %xmm2
153; SSE-NEXT:    cmpunordps %xmm0, %xmm0
154; SSE-NEXT:    andps %xmm0, %xmm1
155; SSE-NEXT:    andnps %xmm2, %xmm0
156; SSE-NEXT:    orps %xmm1, %xmm0
157; SSE-NEXT:    retq
158;
159; AVX-LABEL: test_intrinsic_fmin_v2f32:
160; AVX:       # %bb.0:
161; AVX-NEXT:    vminps %xmm0, %xmm1, %xmm2
162; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm0
163; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
164; AVX-NEXT:    retq
165  %z = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
166  ret <2 x float> %z
167}
168
169define <4 x float> @test_intrinsic_fmin_v4f32(<4 x float> %x, <4 x float> %y) {
170; SSE-LABEL: test_intrinsic_fmin_v4f32:
171; SSE:       # %bb.0:
172; SSE-NEXT:    movaps %xmm1, %xmm2
173; SSE-NEXT:    minps %xmm0, %xmm2
174; SSE-NEXT:    cmpunordps %xmm0, %xmm0
175; SSE-NEXT:    andps %xmm0, %xmm1
176; SSE-NEXT:    andnps %xmm2, %xmm0
177; SSE-NEXT:    orps %xmm1, %xmm0
178; SSE-NEXT:    retq
179;
180; AVX-LABEL: test_intrinsic_fmin_v4f32:
181; AVX:       # %bb.0:
182; AVX-NEXT:    vminps %xmm0, %xmm1, %xmm2
183; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm0
184; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
185; AVX-NEXT:    retq
186  %z = call <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
187  ret <4 x float> %z
188}
189
190define <2 x double> @test_intrinsic_fmin_v2f64(<2 x double> %x, <2 x double> %y) {
191; SSE-LABEL: test_intrinsic_fmin_v2f64:
192; SSE:       # %bb.0:
193; SSE-NEXT:    movapd %xmm1, %xmm2
194; SSE-NEXT:    minpd %xmm0, %xmm2
195; SSE-NEXT:    cmpunordpd %xmm0, %xmm0
196; SSE-NEXT:    andpd %xmm0, %xmm1
197; SSE-NEXT:    andnpd %xmm2, %xmm0
198; SSE-NEXT:    orpd %xmm1, %xmm0
199; SSE-NEXT:    retq
200;
201; AVX-LABEL: test_intrinsic_fmin_v2f64:
202; AVX:       # %bb.0:
203; AVX-NEXT:    vminpd %xmm0, %xmm1, %xmm2
204; AVX-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm0
205; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
206; AVX-NEXT:    retq
207  %z = call <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
208  ret <2 x double> %z
209}
210
211define <4 x double> @test_intrinsic_fmin_v4f64(<4 x double> %x, <4 x double> %y) {
212; SSE-LABEL: test_intrinsic_fmin_v4f64:
213; SSE:       # %bb.0:
214; SSE-NEXT:    movapd %xmm2, %xmm4
215; SSE-NEXT:    minpd %xmm0, %xmm4
216; SSE-NEXT:    cmpunordpd %xmm0, %xmm0
217; SSE-NEXT:    andpd %xmm0, %xmm2
218; SSE-NEXT:    andnpd %xmm4, %xmm0
219; SSE-NEXT:    orpd %xmm2, %xmm0
220; SSE-NEXT:    movapd %xmm3, %xmm2
221; SSE-NEXT:    minpd %xmm1, %xmm2
222; SSE-NEXT:    cmpunordpd %xmm1, %xmm1
223; SSE-NEXT:    andpd %xmm1, %xmm3
224; SSE-NEXT:    andnpd %xmm2, %xmm1
225; SSE-NEXT:    orpd %xmm3, %xmm1
226; SSE-NEXT:    retq
227;
228; AVX-LABEL: test_intrinsic_fmin_v4f64:
229; AVX:       # %bb.0:
230; AVX-NEXT:    vminpd %ymm0, %ymm1, %ymm2
231; AVX-NEXT:    vcmpunordpd %ymm0, %ymm0, %ymm0
232; AVX-NEXT:    vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
233; AVX-NEXT:    retq
234  %z = call <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
235  ret <4 x double> %z
236}
237
238define <8 x double> @test_intrinsic_fmin_v8f64(<8 x double> %x, <8 x double> %y) {
239; SSE-LABEL: test_intrinsic_fmin_v8f64:
240; SSE:       # %bb.0:
241; SSE-NEXT:    movapd %xmm4, %xmm8
242; SSE-NEXT:    minpd %xmm0, %xmm8
243; SSE-NEXT:    cmpunordpd %xmm0, %xmm0
244; SSE-NEXT:    andpd %xmm0, %xmm4
245; SSE-NEXT:    andnpd %xmm8, %xmm0
246; SSE-NEXT:    orpd %xmm4, %xmm0
247; SSE-NEXT:    movapd %xmm5, %xmm4
248; SSE-NEXT:    minpd %xmm1, %xmm4
249; SSE-NEXT:    cmpunordpd %xmm1, %xmm1
250; SSE-NEXT:    andpd %xmm1, %xmm5
251; SSE-NEXT:    andnpd %xmm4, %xmm1
252; SSE-NEXT:    orpd %xmm5, %xmm1
253; SSE-NEXT:    movapd %xmm6, %xmm4
254; SSE-NEXT:    minpd %xmm2, %xmm4
255; SSE-NEXT:    cmpunordpd %xmm2, %xmm2
256; SSE-NEXT:    andpd %xmm2, %xmm6
257; SSE-NEXT:    andnpd %xmm4, %xmm2
258; SSE-NEXT:    orpd %xmm6, %xmm2
259; SSE-NEXT:    movapd %xmm7, %xmm4
260; SSE-NEXT:    minpd %xmm3, %xmm4
261; SSE-NEXT:    cmpunordpd %xmm3, %xmm3
262; SSE-NEXT:    andpd %xmm3, %xmm7
263; SSE-NEXT:    andnpd %xmm4, %xmm3
264; SSE-NEXT:    orpd %xmm7, %xmm3
265; SSE-NEXT:    retq
266;
267; AVX-LABEL: test_intrinsic_fmin_v8f64:
268; AVX:       # %bb.0:
269; AVX-NEXT:    vminpd %ymm0, %ymm2, %ymm4
270; AVX-NEXT:    vcmpunordpd %ymm0, %ymm0, %ymm0
271; AVX-NEXT:    vblendvpd %ymm0, %ymm2, %ymm4, %ymm0
272; AVX-NEXT:    vminpd %ymm1, %ymm3, %ymm2
273; AVX-NEXT:    vcmpunordpd %ymm1, %ymm1, %ymm1
274; AVX-NEXT:    vblendvpd %ymm1, %ymm3, %ymm2, %ymm1
275; AVX-NEXT:    retq
276  %z = call <8 x double> @llvm.minnum.v8f64(<8 x double> %x, <8 x double> %y) readnone
277  ret <8 x double> %z
278}
279
280; The IR-level FMF propagate to the node. With nnan, there's no need to blend.
281
282define float @minnum_intrinsic_nnan_fmf_f32(float %a, float %b) {
283; SSE-LABEL: minnum_intrinsic_nnan_fmf_f32:
284; SSE:       # %bb.0:
285; SSE-NEXT:    minss %xmm1, %xmm0
286; SSE-NEXT:    retq
287;
288; AVX-LABEL: minnum_intrinsic_nnan_fmf_f32:
289; AVX:       # %bb.0:
290; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
291; AVX-NEXT:    retq
292  %r = tail call nnan float @llvm.minnum.f32(float %a, float %b)
293  ret float %r
294}
295
296; Make sure vectors work too.
297
298define <2 x double> @minnum_intrinsic_nnan_fmf_v2f64(<2 x double> %a, <2 x double> %b) {
299; SSE-LABEL: minnum_intrinsic_nnan_fmf_v2f64:
300; SSE:       # %bb.0:
301; SSE-NEXT:    minpd %xmm1, %xmm0
302; SSE-NEXT:    retq
303;
304; AVX-LABEL: minnum_intrinsic_nnan_fmf_v2f64:
305; AVX:       # %bb.0:
306; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
307; AVX-NEXT:    retq
308  %r = tail call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b)
309  ret <2 x double> %r
310}
311
312; Current (but legacy someday): a function-level attribute should also enable the fold.
313
314define double @minnum_intrinsic_nnan_attr_f64(double %a, double %b) #0 {
315; SSE-LABEL: minnum_intrinsic_nnan_attr_f64:
316; SSE:       # %bb.0:
317; SSE-NEXT:    minsd %xmm1, %xmm0
318; SSE-NEXT:    retq
319;
320; AVX-LABEL: minnum_intrinsic_nnan_attr_f64:
321; AVX:       # %bb.0:
322; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
323; AVX-NEXT:    retq
324  %r = tail call double @llvm.minnum.f64(double %a, double %b)
325  ret double %r
326}
327
328; Make sure vectors work too.
329
330define <4 x float> @minnum_intrinsic_nnan_attr_v4f32(<4 x float> %a, <4 x float> %b) #0 {
331; SSE-LABEL: minnum_intrinsic_nnan_attr_v4f32:
332; SSE:       # %bb.0:
333; SSE-NEXT:    minps %xmm1, %xmm0
334; SSE-NEXT:    retq
335;
336; AVX-LABEL: minnum_intrinsic_nnan_attr_v4f32:
337; AVX:       # %bb.0:
338; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
339; AVX-NEXT:    retq
340  %r = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
341  ret <4 x float> %r
342}
343
344attributes #0 = { "no-nans-fp-math"="true" }
345
346