• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
3
4declare float @fmaxf(float, float)
5declare double @fmax(double, double)
6declare x86_fp80 @fmaxl(x86_fp80, x86_fp80)
7declare float @llvm.maxnum.f32(float, float)
8declare double @llvm.maxnum.f64(double, double)
9declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80)
10
11declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
12declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
13declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
14declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
15declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
16
17
18; CHECK-LABEL: @test_fmaxf
19; SSE:         movaps %xmm0, %xmm2
20; SSE-NEXT:    cmpunordss %xmm2, %xmm2
21; SSE-NEXT:    movaps %xmm2, %xmm3
22; SSE-NEXT:    andps %xmm1, %xmm3
23; SSE-NEXT:    maxss %xmm0, %xmm1
24; SSE-NEXT:    andnps %xmm1, %xmm2
25; SSE-NEXT:    orps %xmm3, %xmm2
26; SSE-NEXT:    movaps %xmm2, %xmm0
27; SSE-NEXT:    retq
28;
29; AVX:         vmaxss %xmm0, %xmm1, %xmm2
30; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
31; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
32; AVX-NEXT:    retq
33define float @test_fmaxf(float %x, float %y) {
34  %z = call float @fmaxf(float %x, float %y) readnone
35  ret float %z
36}
37
38; CHECK-LABEL: @test_fmaxf_minsize
39; CHECK:       jmp fmaxf
40define float @test_fmaxf_minsize(float %x, float %y) minsize {
41  %z = call float @fmaxf(float %x, float %y) readnone
42  ret float %z
43}
44
45; FIXME: Doubles should be inlined similarly to floats.
46
47; CHECK-LABEL: @test_fmax
48; CHECK: jmp fmax
49define double @test_fmax(double %x, double %y) {
50  %z = call double @fmax(double %x, double %y) readnone
51  ret double %z
52}
53
54; CHECK-LABEL: @test_fmaxl
55; CHECK: callq fmaxl
56define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) {
57  %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone
58  ret x86_fp80 %z
59}
60
61; CHECK-LABEL: @test_intrinsic_fmaxf
62; SSE:         movaps %xmm0, %xmm2
63; SSE-NEXT:    cmpunordss %xmm2, %xmm2
64; SSE-NEXT:    movaps %xmm2, %xmm3
65; SSE-NEXT:    andps %xmm1, %xmm3
66; SSE-NEXT:    maxss %xmm0, %xmm1
67; SSE-NEXT:    andnps %xmm1, %xmm2
68; SSE-NEXT:    orps %xmm3, %xmm2
69; SSE-NEXT:    movaps %xmm2, %xmm0
70; SSE-NEXT:    retq
71;
72; AVX:         vmaxss %xmm0, %xmm1, %xmm2
73; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
74; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
75; AVX-NEXT:    retq
76define float @test_intrinsic_fmaxf(float %x, float %y) {
77  %z = call float @llvm.maxnum.f32(float %x, float %y) readnone
78  ret float %z
79}
80
81; FIXME: Doubles should be inlined similarly to floats.
82
83; CHECK-LABEL: @test_intrinsic_fmax
84; CHECK: jmp fmax
85define double @test_intrinsic_fmax(double %x, double %y) {
86  %z = call double @llvm.maxnum.f64(double %x, double %y) readnone
87  ret double %z
88}
89
90; CHECK-LABEL: @test_intrinsic_fmaxl
91; CHECK: callq fmaxl
92define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) {
93  %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
94  ret x86_fp80 %z
95}
96
97; FIXME: This should not be doing 4 scalar ops on a 2 element vector.
98; FIXME: This should use vector ops (maxps / cmpps).
99
100; CHECK-LABEL: @test_intrinsic_fmax_v2f32
101; SSE:         movaps %xmm1, %xmm2
102; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
103; SSE-NEXT:    movaps %xmm0, %xmm3
104; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
105; SSE-NEXT:    movaps %xmm3, %xmm4
106; SSE-NEXT:    cmpunordss %xmm4, %xmm4
107; SSE-NEXT:    movaps %xmm4, %xmm5
108; SSE-NEXT:    andps %xmm2, %xmm5
109; SSE-NEXT:    maxss %xmm3, %xmm2
110; SSE-NEXT:    andnps %xmm2, %xmm4
111; SSE-NEXT:    orps %xmm5, %xmm4
112; SSE-NEXT:    movaps %xmm1, %xmm2
113; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
114; SSE-NEXT:    movaps %xmm0, %xmm5
115; SSE-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,1,2,3]
116; SSE-NEXT:    movaps %xmm5, %xmm3
117; SSE-NEXT:    cmpunordss %xmm3, %xmm3
118; SSE-NEXT:    movaps %xmm3, %xmm6
119; SSE-NEXT:    andps %xmm2, %xmm6
120; SSE-NEXT:    maxss %xmm5, %xmm2
121; SSE-NEXT:    andnps %xmm2, %xmm3
122; SSE-NEXT:    orps %xmm6, %xmm3
123; SSE-NEXT:    unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
124; SSE-NEXT:    movaps %xmm0, %xmm2
125; SSE-NEXT:    cmpunordss %xmm2, %xmm2
126; SSE-NEXT:    movaps %xmm2, %xmm4
127; SSE-NEXT:    andps %xmm1, %xmm4
128; SSE-NEXT:    movaps %xmm1, %xmm5
129; SSE-NEXT:    maxss %xmm0, %xmm5
130; SSE-NEXT:    andnps %xmm5, %xmm2
131; SSE-NEXT:    orps %xmm4, %xmm2
132; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
133; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
134; SSE-NEXT:    movapd %xmm0, %xmm4
135; SSE-NEXT:    cmpunordss %xmm4, %xmm4
136; SSE-NEXT:    movaps %xmm4, %xmm5
137; SSE-NEXT:    andps %xmm1, %xmm5
138; SSE-NEXT:    maxss %xmm0, %xmm1
139; SSE-NEXT:    andnps %xmm1, %xmm4
140; SSE-NEXT:    orps %xmm5, %xmm4
141; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
142; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
143; SSE-NEXT:    movaps %xmm2, %xmm0
144; SSE-NEXT:    retq
145;
146; AVX:         vmaxss %xmm0, %xmm1, %xmm2
147; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm3
148; AVX-NEXT:    vblendvps %xmm3, %xmm1, %xmm2, %xmm2
149; AVX-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
150; AVX-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
151; AVX-NEXT:    vmaxss %xmm3, %xmm4, %xmm5
152; AVX-NEXT:    vcmpunordss %xmm3, %xmm3, %xmm3
153; AVX-NEXT:    vblendvps %xmm3, %xmm4, %xmm5, %xmm3
154; AVX-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
155; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
156; AVX-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm1[1,0]
157; AVX-NEXT:    vmaxss %xmm3, %xmm4, %xmm5
158; AVX-NEXT:    vcmpunordss %xmm3, %xmm3, %xmm3
159; AVX-NEXT:    vblendvps %xmm3, %xmm4, %xmm5, %xmm3
160; AVX-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
161; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
162; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
163; AVX-NEXT:    vmaxss %xmm0, %xmm1, %xmm3
164; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
165; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm3, %xmm0
166; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
167; AVX-NEXT:    retq
168define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) {
169  %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
170  ret <2 x float> %z
171}
172
173; FIXME: This should use vector ops (maxps / cmpps).
174
175; CHECK-LABEL: @test_intrinsic_fmax_v4f32
176; SSE:         movaps %xmm1, %xmm2
177; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
178; SSE-NEXT:    movaps %xmm0, %xmm3
179; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
180; SSE-NEXT:    movaps %xmm3, %xmm4
181; SSE-NEXT:    cmpunordss %xmm4, %xmm4
182; SSE-NEXT:    movaps %xmm4, %xmm5
183; SSE-NEXT:    andps %xmm2, %xmm5
184; SSE-NEXT:    maxss %xmm3, %xmm2
185; SSE-NEXT:    andnps %xmm2, %xmm4
186; SSE-NEXT:    orps %xmm5, %xmm4
187; SSE-NEXT:    movaps %xmm1, %xmm2
188; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
189; SSE-NEXT:    movaps %xmm0, %xmm5
190; SSE-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,1,2,3]
191; SSE-NEXT:    movaps %xmm5, %xmm3
192; SSE-NEXT:    cmpunordss %xmm3, %xmm3
193; SSE-NEXT:    movaps %xmm3, %xmm6
194; SSE-NEXT:    andps %xmm2, %xmm6
195; SSE-NEXT:    maxss %xmm5, %xmm2
196; SSE-NEXT:    andnps %xmm2, %xmm3
197; SSE-NEXT:    orps %xmm6, %xmm3
198; SSE-NEXT:    unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
199; SSE-NEXT:    movaps %xmm0, %xmm2
200; SSE-NEXT:    cmpunordss %xmm2, %xmm2
201; SSE-NEXT:    movaps %xmm2, %xmm4
202; SSE-NEXT:    andps %xmm1, %xmm4
203; SSE-NEXT:    movaps %xmm1, %xmm5
204; SSE-NEXT:    maxss %xmm0, %xmm5
205; SSE-NEXT:    andnps %xmm5, %xmm2
206; SSE-NEXT:    orps %xmm4, %xmm2
207; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
208; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
209; SSE-NEXT:    movapd %xmm0, %xmm4
210; SSE-NEXT:    cmpunordss %xmm4, %xmm4
211; SSE-NEXT:    movaps %xmm4, %xmm5
212; SSE-NEXT:    andps %xmm1, %xmm5
213; SSE-NEXT:    maxss %xmm0, %xmm1
214; SSE-NEXT:    andnps %xmm1, %xmm4
215; SSE-NEXT:    orps %xmm5, %xmm4
216; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
217; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
218; SSE-NEXT:    movaps %xmm2, %xmm0
219; SSE-NEXT:    retq
220;
221; AVX:         vmaxss %xmm0, %xmm1, %xmm2
222; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm3
223; AVX-NEXT:    vblendvps %xmm3, %xmm1, %xmm2, %xmm2
224; AVX-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
225; AVX-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
226; AVX-NEXT:    vmaxss %xmm3, %xmm4, %xmm5
227; AVX-NEXT:    vcmpunordss %xmm3, %xmm3, %xmm3
228; AVX-NEXT:    vblendvps %xmm3, %xmm4, %xmm5, %xmm3
229; AVX-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
230; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
231; AVX-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm1[1,0]
232; AVX-NEXT:    vmaxss %xmm3, %xmm4, %xmm5
233; AVX-NEXT:    vcmpunordss %xmm3, %xmm3, %xmm3
234; AVX-NEXT:    vblendvps %xmm3, %xmm4, %xmm5, %xmm3
235; AVX-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
236; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
237; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
238; AVX-NEXT:    vmaxss %xmm0, %xmm1, %xmm3
239; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
240; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm3, %xmm0
241; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
242; AVX-NEXT:    retq
243define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) {
244  %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
245  ret <4 x float> %z
246}
247
248; FIXME: Vector of doubles should be inlined similarly to vector of floats.
249
250; CHECK-LABEL: @test_intrinsic_fmax_v2f64
251; CHECK: callq fmax
252; CHECK: callq fmax
253define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) {
254  %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
255  ret <2 x double> %z
256}
257
258; FIXME: Vector of doubles should be inlined similarly to vector of floats.
259
260; CHECK-LABEL: @test_intrinsic_fmax_v4f64
261; CHECK: callq fmax
262; CHECK: callq fmax
263; CHECK: callq fmax
264; CHECK: callq fmax
265define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) {
266  %z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
267  ret <4 x double> %z
268}
269
270; FIXME: Vector of doubles should be inlined similarly to vector of floats.
271
272; CHECK-LABEL: @test_intrinsic_fmax_v8f64
273; CHECK: callq fmax
274; CHECK: callq fmax
275; CHECK: callq fmax
276; CHECK: callq fmax
277; CHECK: callq fmax
278; CHECK: callq fmax
279; CHECK: callq fmax
280; CHECK: callq fmax
281define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) {
282  %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone
283  ret <8 x double> %z
284}
285
286