• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32
3; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64
4
5; The peephole optimizer can elide some physical register copies such as
6; EFLAGS. Make sure the flags are used directly, instead of needlessly using
7; saving and restoring specific conditions.
8
9@L = external global i32
10@M = external global i8
11
12declare i32 @bar(i64)
13
14define i1 @plus_one() nounwind {
15; CHECK32-LABEL: plus_one:
16; CHECK32:       # %bb.0: # %entry
17; CHECK32-NEXT:    movb M, %al
18; CHECK32-NEXT:    incl L
19; CHECK32-NEXT:    jne .LBB0_2
20; CHECK32-NEXT:  # %bb.1: # %entry
21; CHECK32-NEXT:    andb $8, %al
22; CHECK32-NEXT:    je .LBB0_2
23; CHECK32-NEXT:  # %bb.3: # %exit2
24; CHECK32-NEXT:    xorl %eax, %eax
25; CHECK32-NEXT:    retl
26; CHECK32-NEXT:  .LBB0_2: # %exit
27; CHECK32-NEXT:    movb $1, %al
28; CHECK32-NEXT:    retl
29;
30; CHECK64-LABEL: plus_one:
31; CHECK64:       # %bb.0: # %entry
32; CHECK64-NEXT:    movb {{.*}}(%rip), %al
33; CHECK64-NEXT:    incl {{.*}}(%rip)
34; CHECK64-NEXT:    jne .LBB0_2
35; CHECK64-NEXT:  # %bb.1: # %entry
36; CHECK64-NEXT:    andb $8, %al
37; CHECK64-NEXT:    je .LBB0_2
38; CHECK64-NEXT:  # %bb.3: # %exit2
39; CHECK64-NEXT:    xorl %eax, %eax
40; CHECK64-NEXT:    retq
41; CHECK64-NEXT:  .LBB0_2: # %exit
42; CHECK64-NEXT:    movb $1, %al
43; CHECK64-NEXT:    retq
44entry:
45  %loaded_L = load i32, i32* @L
46  %val = add nsw i32 %loaded_L, 1 ; N.B. will emit inc.
47  store i32 %val, i32* @L
48  %loaded_M = load i8, i8* @M
49  %masked = and i8 %loaded_M, 8
50  %M_is_true = icmp ne i8 %masked, 0
51  %L_is_false = icmp eq i32 %val, 0
52  %cond = and i1 %L_is_false, %M_is_true
53  br i1 %cond, label %exit2, label %exit
54
55exit:
56  ret i1 true
57
58exit2:
59  ret i1 false
60}
61
62define i1 @plus_forty_two() nounwind {
63; CHECK32-LABEL: plus_forty_two:
64; CHECK32:       # %bb.0: # %entry
65; CHECK32-NEXT:    movb M, %al
66; CHECK32-NEXT:    addl $42, L
67; CHECK32-NEXT:    jne .LBB1_2
68; CHECK32-NEXT:  # %bb.1: # %entry
69; CHECK32-NEXT:    andb $8, %al
70; CHECK32-NEXT:    je .LBB1_2
71; CHECK32-NEXT:  # %bb.3: # %exit2
72; CHECK32-NEXT:    xorl %eax, %eax
73; CHECK32-NEXT:    retl
74; CHECK32-NEXT:  .LBB1_2: # %exit
75; CHECK32-NEXT:    movb $1, %al
76; CHECK32-NEXT:    retl
77;
78; CHECK64-LABEL: plus_forty_two:
79; CHECK64:       # %bb.0: # %entry
80; CHECK64-NEXT:    movb {{.*}}(%rip), %al
81; CHECK64-NEXT:    addl $42, {{.*}}(%rip)
82; CHECK64-NEXT:    jne .LBB1_2
83; CHECK64-NEXT:  # %bb.1: # %entry
84; CHECK64-NEXT:    andb $8, %al
85; CHECK64-NEXT:    je .LBB1_2
86; CHECK64-NEXT:  # %bb.3: # %exit2
87; CHECK64-NEXT:    xorl %eax, %eax
88; CHECK64-NEXT:    retq
89; CHECK64-NEXT:  .LBB1_2: # %exit
90; CHECK64-NEXT:    movb $1, %al
91; CHECK64-NEXT:    retq
92entry:
93  %loaded_L = load i32, i32* @L
94  %val = add nsw i32 %loaded_L, 42 ; N.B. won't emit inc.
95  store i32 %val, i32* @L
96  %loaded_M = load i8, i8* @M
97  %masked = and i8 %loaded_M, 8
98  %M_is_true = icmp ne i8 %masked, 0
99  %L_is_false = icmp eq i32 %val, 0
100  %cond = and i1 %L_is_false, %M_is_true
101  br i1 %cond, label %exit2, label %exit
102
103exit:
104  ret i1 true
105
106exit2:
107  ret i1 false
108}
109
110define i1 @minus_one() nounwind {
111; CHECK32-LABEL: minus_one:
112; CHECK32:       # %bb.0: # %entry
113; CHECK32-NEXT:    movb M, %al
114; CHECK32-NEXT:    decl L
115; CHECK32-NEXT:    jne .LBB2_2
116; CHECK32-NEXT:  # %bb.1: # %entry
117; CHECK32-NEXT:    andb $8, %al
118; CHECK32-NEXT:    je .LBB2_2
119; CHECK32-NEXT:  # %bb.3: # %exit2
120; CHECK32-NEXT:    xorl %eax, %eax
121; CHECK32-NEXT:    retl
122; CHECK32-NEXT:  .LBB2_2: # %exit
123; CHECK32-NEXT:    movb $1, %al
124; CHECK32-NEXT:    retl
125;
126; CHECK64-LABEL: minus_one:
127; CHECK64:       # %bb.0: # %entry
128; CHECK64-NEXT:    movb {{.*}}(%rip), %al
129; CHECK64-NEXT:    decl {{.*}}(%rip)
130; CHECK64-NEXT:    jne .LBB2_2
131; CHECK64-NEXT:  # %bb.1: # %entry
132; CHECK64-NEXT:    andb $8, %al
133; CHECK64-NEXT:    je .LBB2_2
134; CHECK64-NEXT:  # %bb.3: # %exit2
135; CHECK64-NEXT:    xorl %eax, %eax
136; CHECK64-NEXT:    retq
137; CHECK64-NEXT:  .LBB2_2: # %exit
138; CHECK64-NEXT:    movb $1, %al
139; CHECK64-NEXT:    retq
140entry:
141  %loaded_L = load i32, i32* @L
142  %val = add nsw i32 %loaded_L, -1 ; N.B. will emit dec.
143  store i32 %val, i32* @L
144  %loaded_M = load i8, i8* @M
145  %masked = and i8 %loaded_M, 8
146  %M_is_true = icmp ne i8 %masked, 0
147  %L_is_false = icmp eq i32 %val, 0
148  %cond = and i1 %L_is_false, %M_is_true
149  br i1 %cond, label %exit2, label %exit
150
151exit:
152  ret i1 true
153
154exit2:
155  ret i1 false
156}
157
158define i1 @minus_forty_two() nounwind {
159; CHECK32-LABEL: minus_forty_two:
160; CHECK32:       # %bb.0: # %entry
161; CHECK32-NEXT:    movb M, %al
162; CHECK32-NEXT:    addl $-42, L
163; CHECK32-NEXT:    jne .LBB3_2
164; CHECK32-NEXT:  # %bb.1: # %entry
165; CHECK32-NEXT:    andb $8, %al
166; CHECK32-NEXT:    je .LBB3_2
167; CHECK32-NEXT:  # %bb.3: # %exit2
168; CHECK32-NEXT:    xorl %eax, %eax
169; CHECK32-NEXT:    retl
170; CHECK32-NEXT:  .LBB3_2: # %exit
171; CHECK32-NEXT:    movb $1, %al
172; CHECK32-NEXT:    retl
173;
174; CHECK64-LABEL: minus_forty_two:
175; CHECK64:       # %bb.0: # %entry
176; CHECK64-NEXT:    movb {{.*}}(%rip), %al
177; CHECK64-NEXT:    addl $-42, {{.*}}(%rip)
178; CHECK64-NEXT:    jne .LBB3_2
179; CHECK64-NEXT:  # %bb.1: # %entry
180; CHECK64-NEXT:    andb $8, %al
181; CHECK64-NEXT:    je .LBB3_2
182; CHECK64-NEXT:  # %bb.3: # %exit2
183; CHECK64-NEXT:    xorl %eax, %eax
184; CHECK64-NEXT:    retq
185; CHECK64-NEXT:  .LBB3_2: # %exit
186; CHECK64-NEXT:    movb $1, %al
187; CHECK64-NEXT:    retq
188entry:
189  %loaded_L = load i32, i32* @L
190  %val = add nsw i32 %loaded_L, -42 ; N.B. won't emit dec.
191  store i32 %val, i32* @L
192  %loaded_M = load i8, i8* @M
193  %masked = and i8 %loaded_M, 8
194  %M_is_true = icmp ne i8 %masked, 0
195  %L_is_false = icmp eq i32 %val, 0
196  %cond = and i1 %L_is_false, %M_is_true
197  br i1 %cond, label %exit2, label %exit
198
199exit:
200  ret i1 true
201
202exit2:
203  ret i1 false
204}
205
206define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
207; CHECK32-LABEL: test_intervening_call:
208; CHECK32:       # %bb.0: # %entry
209; CHECK32-NEXT:    pushl %ebx
210; CHECK32-NEXT:    pushl %esi
211; CHECK32-NEXT:    pushl %eax
212; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
213; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
214; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
215; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
216; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
217; CHECK32-NEXT:    lock cmpxchg8b (%esi)
218; CHECK32-NEXT:    setne %bl
219; CHECK32-NEXT:    subl $8, %esp
220; CHECK32-NEXT:    pushl %edx
221; CHECK32-NEXT:    pushl %eax
222; CHECK32-NEXT:    calll bar
223; CHECK32-NEXT:    addl $16, %esp
224; CHECK32-NEXT:    testb %bl, %bl
225; CHECK32-NEXT:    jne .LBB4_3
226; CHECK32-NEXT:  # %bb.1: # %t
227; CHECK32-NEXT:    movl $42, %eax
228; CHECK32-NEXT:    jmp .LBB4_2
229; CHECK32-NEXT:  .LBB4_3: # %f
230; CHECK32-NEXT:    xorl %eax, %eax
231; CHECK32-NEXT:  .LBB4_2: # %t
232; CHECK32-NEXT:    xorl %edx, %edx
233; CHECK32-NEXT:    addl $4, %esp
234; CHECK32-NEXT:    popl %esi
235; CHECK32-NEXT:    popl %ebx
236; CHECK32-NEXT:    retl
237;
238; CHECK64-LABEL: test_intervening_call:
239; CHECK64:       # %bb.0: # %entry
240; CHECK64-NEXT:    pushq %rbx
241; CHECK64-NEXT:    movq %rsi, %rax
242; CHECK64-NEXT:    lock cmpxchgq %rdx, (%rdi)
243; CHECK64-NEXT:    setne %bl
244; CHECK64-NEXT:    movq %rax, %rdi
245; CHECK64-NEXT:    callq bar
246; CHECK64-NEXT:    testb %bl, %bl
247; CHECK64-NEXT:    jne .LBB4_2
248; CHECK64-NEXT:  # %bb.1: # %t
249; CHECK64-NEXT:    movl $42, %eax
250; CHECK64-NEXT:    popq %rbx
251; CHECK64-NEXT:    retq
252; CHECK64-NEXT:  .LBB4_2: # %f
253; CHECK64-NEXT:    xorl %eax, %eax
254; CHECK64-NEXT:    popq %rbx
255; CHECK64-NEXT:    retq
256entry:
257  ; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS.
258  %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
259  %v = extractvalue { i64, i1 } %cx, 0
260  %p = extractvalue { i64, i1 } %cx, 1
261  call i32 @bar(i64 %v)
262  br i1 %p, label %t, label %f
263
264t:
265  ret i64 42
266
267f:
268  ret i64 0
269}
270
271define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i64 %bar1, i64 %baz1) nounwind {
272; CHECK32-LABEL: test_two_live_flags:
273; CHECK32:       # %bb.0: # %entry
274; CHECK32-NEXT:    pushl %ebp
275; CHECK32-NEXT:    pushl %ebx
276; CHECK32-NEXT:    pushl %edi
277; CHECK32-NEXT:    pushl %esi
278; CHECK32-NEXT:    pushl %eax
279; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edi
280; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
281; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
282; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
283; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
284; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
285; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
286; CHECK32-NEXT:    lock cmpxchg8b (%esi)
287; CHECK32-NEXT:    setne {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
288; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
289; CHECK32-NEXT:    movl %edi, %edx
290; CHECK32-NEXT:    movl %ebp, %ecx
291; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
292; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
293; CHECK32-NEXT:    lock cmpxchg8b (%esi)
294; CHECK32-NEXT:    sete %al
295; CHECK32-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
296; CHECK32-NEXT:    jne .LBB5_4
297; CHECK32-NEXT:  # %bb.1: # %entry
298; CHECK32-NEXT:    testb %al, %al
299; CHECK32-NEXT:    je .LBB5_4
300; CHECK32-NEXT:  # %bb.2: # %t
301; CHECK32-NEXT:    movl $42, %eax
302; CHECK32-NEXT:    jmp .LBB5_3
303; CHECK32-NEXT:  .LBB5_4: # %f
304; CHECK32-NEXT:    xorl %eax, %eax
305; CHECK32-NEXT:  .LBB5_3: # %t
306; CHECK32-NEXT:    xorl %edx, %edx
307; CHECK32-NEXT:    addl $4, %esp
308; CHECK32-NEXT:    popl %esi
309; CHECK32-NEXT:    popl %edi
310; CHECK32-NEXT:    popl %ebx
311; CHECK32-NEXT:    popl %ebp
312; CHECK32-NEXT:    retl
313;
314; CHECK64-LABEL: test_two_live_flags:
315; CHECK64:       # %bb.0: # %entry
316; CHECK64-NEXT:    movq %rsi, %rax
317; CHECK64-NEXT:    lock cmpxchgq %rdx, (%rdi)
318; CHECK64-NEXT:    setne %dl
319; CHECK64-NEXT:    movq %r8, %rax
320; CHECK64-NEXT:    lock cmpxchgq %r9, (%rcx)
321; CHECK64-NEXT:    sete %al
322; CHECK64-NEXT:    testb %dl, %dl
323; CHECK64-NEXT:    jne .LBB5_3
324; CHECK64-NEXT:  # %bb.1: # %entry
325; CHECK64-NEXT:    testb %al, %al
326; CHECK64-NEXT:    je .LBB5_3
327; CHECK64-NEXT:  # %bb.2: # %t
328; CHECK64-NEXT:    movl $42, %eax
329; CHECK64-NEXT:    retq
330; CHECK64-NEXT:  .LBB5_3: # %f
331; CHECK64-NEXT:    xorl %eax, %eax
332; CHECK64-NEXT:    retq
333entry:
334  %cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst
335  %p0 = extractvalue { i64, i1 } %cx0, 1
336  %cx1 = cmpxchg i64* %foo1, i64 %bar1, i64 %baz1 seq_cst seq_cst
337  %p1 = extractvalue { i64, i1 } %cx1, 1
338  %flag = and i1 %p0, %p1
339  br i1 %flag, label %t, label %f
340
341t:
342  ret i64 42
343
344f:
345  ret i64 0
346}
347
348define i1 @asm_clobbering_flags(i32* %mem) nounwind {
349; CHECK32-LABEL: asm_clobbering_flags:
350; CHECK32:       # %bb.0: # %entry
351; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
352; CHECK32-NEXT:    movl (%ecx), %edx
353; CHECK32-NEXT:    testl %edx, %edx
354; CHECK32-NEXT:    setg %al
355; CHECK32-NEXT:    #APP
356; CHECK32-NEXT:    bsfl %edx, %edx
357; CHECK32-NEXT:    #NO_APP
358; CHECK32-NEXT:    movl %edx, (%ecx)
359; CHECK32-NEXT:    retl
360;
361; CHECK64-LABEL: asm_clobbering_flags:
362; CHECK64:       # %bb.0: # %entry
363; CHECK64-NEXT:    movl (%rdi), %ecx
364; CHECK64-NEXT:    testl %ecx, %ecx
365; CHECK64-NEXT:    setg %al
366; CHECK64-NEXT:    #APP
367; CHECK64-NEXT:    bsfl %ecx, %ecx
368; CHECK64-NEXT:    #NO_APP
369; CHECK64-NEXT:    movl %ecx, (%rdi)
370; CHECK64-NEXT:    retq
371entry:
372  %val = load i32, i32* %mem, align 4
373  %cmp = icmp sgt i32 %val, 0
374  %res = tail call i32 asm "bsfl $1,$0", "=r,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %val)
375  store i32 %res, i32* %mem, align 4
376  ret i1 %cmp
377}
378