• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=CMOV
2; RUN: llc < %s -asm-verbose=false -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=NOCMOV
3
4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
5
6; Test 2xCMOV patterns exposed after legalization.
7; One way to do that is with (select (fcmp une/oeq)), which gets
8; legalized to setp/setne.
9
10; CHECK-LABEL: test_select_fcmp_oeq_i32:
11
12; CMOV-NEXT: ucomiss  %xmm1, %xmm0
13; CMOV-NEXT: cmovnel  %esi, %edi
14; CMOV-NEXT: cmovpl  %esi, %edi
15; CMOV-NEXT: movl  %edi, %eax
16; CMOV-NEXT: retq
17
18; NOCMOV-NEXT:  flds  8(%esp)
19; NOCMOV-NEXT:  flds  4(%esp)
20; NOCMOV-NEXT:  fucompp
21; NOCMOV-NEXT:  fnstsw  %ax
22; NOCMOV-NEXT:  sahf
23; NOCMOV-NEXT:  leal  16(%esp), %eax
24; NOCMOV-NEXT:  jne  [[TBB:.LBB[0-9_]+]]
25; NOCMOV-NEXT:  jp  [[TBB]]
26; NOCMOV-NEXT:  leal  12(%esp), %eax
27; NOCMOV-NEXT:[[TBB]]:
28; NOCMOV-NEXT:  movl  (%eax), %eax
29; NOCMOV-NEXT:  retl
30define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 {
31entry:
32  %cmp = fcmp oeq float %a, %b
33  %r = select i1 %cmp, i32 %c, i32 %d
34  ret i32 %r
35}
36
37; CHECK-LABEL: test_select_fcmp_oeq_i64:
38
39; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
40; CMOV-NEXT:   cmovneq  %rsi, %rdi
41; CMOV-NEXT:   cmovpq  %rsi, %rdi
42; CMOV-NEXT:   movq  %rdi, %rax
43; CMOV-NEXT:   retq
44
45; NOCMOV-NEXT:   flds  8(%esp)
46; NOCMOV-NEXT:   flds  4(%esp)
47; NOCMOV-NEXT:   fucompp
48; NOCMOV-NEXT:   fnstsw  %ax
49; NOCMOV-NEXT:   sahf
50; NOCMOV-NEXT:   leal  20(%esp), %ecx
51; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
52; NOCMOV-NEXT:   jp  [[TBB]]
53; NOCMOV-NEXT:   leal  12(%esp), %ecx
54; NOCMOV-NEXT: [[TBB]]:
55; NOCMOV-NEXT:   movl  (%ecx), %eax
56; NOCMOV-NEXT:   movl  4(%ecx), %edx
57; NOCMOV-NEXT:   retl
58define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) #0 {
59entry:
60  %cmp = fcmp oeq float %a, %b
61  %r = select i1 %cmp, i64 %c, i64 %d
62  ret i64 %r
63}
64
65; CHECK-LABEL: test_select_fcmp_une_i64:
66
67; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
68; CMOV-NEXT:   cmovneq  %rdi, %rsi
69; CMOV-NEXT:   cmovpq  %rdi, %rsi
70; CMOV-NEXT:   movq  %rsi, %rax
71; CMOV-NEXT:   retq
72
73; NOCMOV-NEXT:   flds  8(%esp)
74; NOCMOV-NEXT:   flds  4(%esp)
75; NOCMOV-NEXT:   fucompp
76; NOCMOV-NEXT:   fnstsw  %ax
77; NOCMOV-NEXT:   sahf
78; NOCMOV-NEXT:   leal  12(%esp), %ecx
79; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
80; NOCMOV-NEXT:   jp  [[TBB]]
81; NOCMOV-NEXT:   leal  20(%esp), %ecx
82; NOCMOV-NEXT: [[TBB]]:
83; NOCMOV-NEXT:   movl  (%ecx), %eax
84; NOCMOV-NEXT:   movl  4(%ecx), %edx
85; NOCMOV-NEXT:   retl
86define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) #0 {
87entry:
88  %cmp = fcmp une float %a, %b
89  %r = select i1 %cmp, i64 %c, i64 %d
90  ret i64 %r
91}
92
93; CHECK-LABEL: test_select_fcmp_oeq_f64:
94
95; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
96; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
97; CMOV-NEXT:   jp  [[TBB]]
98; CMOV-NEXT:   movaps  %xmm2, %xmm3
99; CMOV-NEXT: [[TBB]]:
100; CMOV-NEXT:   movaps  %xmm3, %xmm0
101; CMOV-NEXT:   retq
102
103; NOCMOV-NEXT:   flds  8(%esp)
104; NOCMOV-NEXT:   flds  4(%esp)
105; NOCMOV-NEXT:   fucompp
106; NOCMOV-NEXT:   fnstsw  %ax
107; NOCMOV-NEXT:   sahf
108; NOCMOV-NEXT:   leal  20(%esp), %eax
109; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
110; NOCMOV-NEXT:   jp  [[TBB]]
111; NOCMOV-NEXT:   leal  12(%esp), %eax
112; NOCMOV-NEXT: [[TBB]]:
113; NOCMOV-NEXT:   fldl  (%eax)
114; NOCMOV-NEXT:   retl
115define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 {
116entry:
117  %cmp = fcmp oeq float %a, %b
118  %r = select i1 %cmp, double %c, double %d
119  ret double %r
120}
121
122; CHECK-LABEL: test_select_fcmp_oeq_v4i32:
123
124; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
125; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
126; CMOV-NEXT:   jp  [[TBB]]
127; CMOV-NEXT:   movaps  %xmm2, %xmm3
128; CMOV-NEXT: [[TBB]]:
129; CMOV-NEXT:   movaps  %xmm3, %xmm0
130; CMOV-NEXT:   retq
131
132; NOCMOV-NEXT:   pushl  %edi
133; NOCMOV-NEXT:   pushl  %esi
134; NOCMOV-NEXT:   flds  20(%esp)
135; NOCMOV-NEXT:   flds  16(%esp)
136; NOCMOV-NEXT:   fucompp
137; NOCMOV-NEXT:   fnstsw  %ax
138; NOCMOV-NEXT:   sahf
139; NOCMOV-NEXT:   leal  40(%esp), %eax
140; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
141; NOCMOV-NEXT:   jp  [[TBB]]
142; NOCMOV-NEXT:   leal  24(%esp), %eax
143; NOCMOV-NEXT: [[TBB]]:
144; NOCMOV-NEXT:   movl  (%eax), %ecx
145; NOCMOV-NEXT:   leal  44(%esp), %edx
146; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
147; NOCMOV-NEXT:   jp  [[TBB]]
148; NOCMOV-NEXT:   leal  28(%esp), %edx
149; NOCMOV-NEXT: [[TBB]]:
150; NOCMOV-NEXT:   movl  12(%esp), %eax
151; NOCMOV-NEXT:   movl  (%edx), %edx
152; NOCMOV-NEXT:   leal  48(%esp), %esi
153; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
154; NOCMOV-NEXT:   jp  [[TBB]]
155; NOCMOV-NEXT:   leal  32(%esp), %esi
156; NOCMOV-NEXT: [[TBB]]:
157; NOCMOV-NEXT:   movl  (%esi), %esi
158; NOCMOV-NEXT:   leal  52(%esp), %edi
159; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
160; NOCMOV-NEXT:   jp  [[TBB]]
161; NOCMOV-NEXT:   leal  36(%esp), %edi
162; NOCMOV-NEXT: [[TBB]]:
163; NOCMOV-NEXT:   movl  (%edi), %edi
164; NOCMOV-NEXT:   movl  %edi, 12(%eax)
165; NOCMOV-NEXT:   movl  %esi, 8(%eax)
166; NOCMOV-NEXT:   movl  %edx, 4(%eax)
167; NOCMOV-NEXT:   movl  %ecx, (%eax)
168; NOCMOV-NEXT:   popl  %esi
169; NOCMOV-NEXT:   popl  %edi
170; NOCMOV-NEXT:   retl  $4
171define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 {
172entry:
173  %cmp = fcmp oeq float %a, %b
174  %r = select i1 %cmp, <4 x i32> %c, <4 x i32> %d
175  ret <4 x i32> %r
176}
177
178; Also make sure we catch the original code-sequence of interest:
179
180; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
181; CMOV-NEXT:   .long  1065353216
182
183; CHECK-LABEL: test_zext_fcmp_une:
184; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
185; CMOV-NEXT:   movss  [[ONE_F32_LCPI]](%rip), %xmm0
186; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
187; CMOV-NEXT:   jp  [[TBB]]
188; CMOV-NEXT:   xorps  %xmm0, %xmm0
189; CMOV-NEXT: [[TBB]]:
190; CMOV-NEXT:   retq
191
192; NOCMOV:        jne
193; NOCMOV-NEXT:   jp
194define float @test_zext_fcmp_une(float %a, float %b) #0 {
195entry:
196  %cmp = fcmp une float %a, %b
197  %conv1 = zext i1 %cmp to i32
198  %conv2 = sitofp i32 %conv1 to float
199  ret float %conv2
200}
201
202; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
203; CMOV-NEXT:   .long  1065353216
204
205; CHECK-LABEL: test_zext_fcmp_oeq:
206; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
207; CMOV-NEXT:   xorps  %xmm0, %xmm0
208; CMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
209; CMOV-NEXT:   jp  [[TBB]]
210; CMOV-NEXT:   movss  [[ONE_F32_LCPI]](%rip), %xmm0
211; CMOV-NEXT: [[TBB]]:
212; CMOV-NEXT:   retq
213
214; NOCMOV:        jne
215; NOCMOV-NEXT:   jp
216define float @test_zext_fcmp_oeq(float %a, float %b) #0 {
217entry:
218  %cmp = fcmp oeq float %a, %b
219  %conv1 = zext i1 %cmp to i32
220  %conv2 = sitofp i32 %conv1 to float
221  ret float %conv2
222}
223
224attributes #0 = { nounwind }
225
226@g8 = global i8 0
227
228; The following test failed because llvm had a bug where a structure like:
229;
230; %12 = CMOV_GR8 %7, %11 ... (lt)
231; %13 = CMOV_GR8 %12, %11 ... (gt)
232;
233; was lowered to:
234;
235; The first two cmovs got expanded to:
236; %bb.0:
237;   JL_1 %bb.9
238; %bb.7:
239;   JG_1 %bb.9
240; %bb.8:
241; %bb.9:
242;   %12 = phi(%7, %bb.8, %11, %bb.0, %12, %bb.7)
243;   %13 = COPY %12
244; Which was invalid as %12 is not the same value as %13
245
246; CHECK-LABEL: no_cascade_opt:
247; CMOV-DAG: cmpl %edx, %esi
248; CMOV-DAG: movb $20, %al
249; CMOV-DAG: movb $20, %dl
250; CMOV:   jge [[BB2:.LBB[0-9_]+]]
251; CMOV:   jle [[BB3:.LBB[0-9_]+]]
252; CMOV: [[BB0:.LBB[0-9_]+]]
253; CMOV:   testl %edi, %edi
254; CMOV:   jne [[BB4:.LBB[0-9_]+]]
255; CMOV: [[BB1:.LBB[0-9_]+]]
256; CMOV:   movb %al, g8(%rip)
257; CMOV:   retq
258; CMOV: [[BB2]]:
259; CMOV:   movl %ecx, %edx
260; CMOV:   jg [[BB0]]
261; CMOV: [[BB3]]:
262; CMOV:   movl %edx, %eax
263; CMOV:   testl %edi, %edi
264; CMOV:   je [[BB1]]
265; CMOV: [[BB4]]:
266; CMOV:   movl %edx, %eax
267; CMOV:   movb %al, g8(%rip)
268; CMOV:   retq
269define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) {
270entry:
271  %c0 = icmp eq i32 %v0, 0
272  %c1 = icmp slt i32 %v1, %v2
273  %c2 = icmp sgt i32 %v1, %v2
274  %trunc = trunc i32 %v3 to i8
275  %sel0 = select i1 %c1, i8 20, i8 %trunc
276  %sel1 = select i1 %c2, i8 20, i8 %sel0
277  %sel2 = select i1 %c0, i8 %sel1, i8 %sel0
278  store volatile i8 %sel2, i8* @g8
279  ret void
280}
281