• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefix=X32
3; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefix=X64
4;
5; Test patterns that require preserving and restoring flags.
6
7@b = common global i8 0, align 1
8@c = common global i32 0, align 4
9@a = common global i8 0, align 1
10@d = common global i8 0, align 1
11@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
12
13declare dso_local void @external(i32)
14
15; A test that re-uses flags in interesting ways due to volatile accesses.
16; Specifically, the first increment's flags are reused for the branch despite
17; being clobbered by the second increment.
18define i32 @test1() nounwind {
19; X32-LABEL: test1:
20; X32:       # %bb.0: # %entry
21; X32-NEXT:    movb b, %cl
22; X32-NEXT:    movl %ecx, %eax
23; X32-NEXT:    incb %al
24; X32-NEXT:    movb %al, b
25; X32-NEXT:    incl c
26; X32-NEXT:    sete %dl
27; X32-NEXT:    movb a, %ah
28; X32-NEXT:    movb %ah, %ch
29; X32-NEXT:    incb %ch
30; X32-NEXT:    cmpb %cl, %ah
31; X32-NEXT:    sete d
32; X32-NEXT:    movb %ch, a
33; X32-NEXT:    testb %dl, %dl
34; X32-NEXT:    jne .LBB0_2
35; X32-NEXT:  # %bb.1: # %if.then
36; X32-NEXT:    movsbl %al, %eax
37; X32-NEXT:    pushl %eax
38; X32-NEXT:    calll external
39; X32-NEXT:    addl $4, %esp
40; X32-NEXT:  .LBB0_2: # %if.end
41; X32-NEXT:    xorl %eax, %eax
42; X32-NEXT:    retl
43;
44; X64-LABEL: test1:
45; X64:       # %bb.0: # %entry
46; X64-NEXT:    pushq %rax
47; X64-NEXT:    movb {{.*}}(%rip), %cl
48; X64-NEXT:    leal 1(%rcx), %eax
49; X64-NEXT:    movb %al, {{.*}}(%rip)
50; X64-NEXT:    incl {{.*}}(%rip)
51; X64-NEXT:    sete %dl
52; X64-NEXT:    movb {{.*}}(%rip), %sil
53; X64-NEXT:    leal 1(%rsi), %edi
54; X64-NEXT:    cmpb %cl, %sil
55; X64-NEXT:    sete {{.*}}(%rip)
56; X64-NEXT:    movb %dil, {{.*}}(%rip)
57; X64-NEXT:    testb %dl, %dl
58; X64-NEXT:    jne .LBB0_2
59; X64-NEXT:  # %bb.1: # %if.then
60; X64-NEXT:    movsbl %al, %edi
61; X64-NEXT:    callq external
62; X64-NEXT:  .LBB0_2: # %if.end
63; X64-NEXT:    xorl %eax, %eax
64; X64-NEXT:    popq %rcx
65; X64-NEXT:    retq
66entry:
67  %bval = load i8, i8* @b
68  %inc = add i8 %bval, 1
69  store volatile i8 %inc, i8* @b
70  %cval = load volatile i32, i32* @c
71  %inc1 = add nsw i32 %cval, 1
72  store volatile i32 %inc1, i32* @c
73  %aval = load volatile i8, i8* @a
74  %inc2 = add i8 %aval, 1
75  store volatile i8 %inc2, i8* @a
76  %cmp = icmp eq i8 %aval, %bval
77  %conv5 = zext i1 %cmp to i8
78  store i8 %conv5, i8* @d
79  %tobool = icmp eq i32 %inc1, 0
80  br i1 %tobool, label %if.end, label %if.then
81
82if.then:
83  %conv6 = sext i8 %inc to i32
84  call void @external(i32 %conv6)
85  br label %if.end
86
87if.end:
88  ret i32 0
89}
90
91; Preserve increment flags across a call.
92define i32 @test2(i32* %ptr) nounwind {
93; X32-LABEL: test2:
94; X32:       # %bb.0: # %entry
95; X32-NEXT:    pushl %ebx
96; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
97; X32-NEXT:    incl (%eax)
98; X32-NEXT:    setne %bl
99; X32-NEXT:    pushl $42
100; X32-NEXT:    calll external
101; X32-NEXT:    addl $4, %esp
102; X32-NEXT:    testb %bl, %bl
103; X32-NEXT:    jne .LBB1_2
104; X32-NEXT:  # %bb.1: # %then
105; X32-NEXT:    movl $64, %eax
106; X32-NEXT:    popl %ebx
107; X32-NEXT:    retl
108; X32-NEXT:  .LBB1_2: # %else
109; X32-NEXT:    xorl %eax, %eax
110; X32-NEXT:    popl %ebx
111; X32-NEXT:    retl
112;
113; X64-LABEL: test2:
114; X64:       # %bb.0: # %entry
115; X64-NEXT:    pushq %rbx
116; X64-NEXT:    incl (%rdi)
117; X64-NEXT:    setne %bl
118; X64-NEXT:    movl $42, %edi
119; X64-NEXT:    callq external
120; X64-NEXT:    testb %bl, %bl
121; X64-NEXT:    jne .LBB1_2
122; X64-NEXT:  # %bb.1: # %then
123; X64-NEXT:    movl $64, %eax
124; X64-NEXT:    popq %rbx
125; X64-NEXT:    retq
126; X64-NEXT:  .LBB1_2: # %else
127; X64-NEXT:    xorl %eax, %eax
128; X64-NEXT:    popq %rbx
129; X64-NEXT:    retq
130entry:
131  %val = load i32, i32* %ptr
132  %inc = add i32 %val, 1
133  store i32 %inc, i32* %ptr
134  %cmp = icmp eq i32 %inc, 0
135  call void @external(i32 42)
136  br i1 %cmp, label %then, label %else
137
138then:
139  ret i32 64
140
141else:
142  ret i32 0
143}
144
145declare dso_local void @external_a()
146declare dso_local void @external_b()
147
148; This lowers to a conditional tail call instead of a conditional branch. This
149; is tricky because we can only do this from a leaf function, and so we have to
150; use volatile stores similar to test1 to force the save and restore of
151; a condition without calling another function. We then set up subsequent calls
152; in tail position.
153define void @test_tail_call(i32* %ptr) nounwind optsize {
154; X32-LABEL: test_tail_call:
155; X32:       # %bb.0: # %entry
156; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
157; X32-NEXT:    incl (%eax)
158; X32-NEXT:    setne %al
159; X32-NEXT:    incb a
160; X32-NEXT:    sete d
161; X32-NEXT:    testb %al, %al
162; X32-NEXT:    jne external_b # TAILCALL
163; X32-NEXT:  # %bb.1: # %then
164; X32-NEXT:    jmp external_a # TAILCALL
165;
166; X64-LABEL: test_tail_call:
167; X64:       # %bb.0: # %entry
168; X64-NEXT:    incl (%rdi)
169; X64-NEXT:    setne %al
170; X64-NEXT:    incb {{.*}}(%rip)
171; X64-NEXT:    sete {{.*}}(%rip)
172; X64-NEXT:    testb %al, %al
173; X64-NEXT:    jne external_b # TAILCALL
174; X64-NEXT:  # %bb.1: # %then
175; X64-NEXT:    jmp external_a # TAILCALL
176entry:
177  %val = load i32, i32* %ptr
178  %inc = add i32 %val, 1
179  store i32 %inc, i32* %ptr
180  %cmp = icmp eq i32 %inc, 0
181  %aval = load volatile i8, i8* @a
182  %inc2 = add i8 %aval, 1
183  store volatile i8 %inc2, i8* @a
184  %cmp2 = icmp eq i8 %inc2, 0
185  %conv5 = zext i1 %cmp2 to i8
186  store i8 %conv5, i8* @d
187  br i1 %cmp, label %then, label %else
188
189then:
190  tail call void @external_a()
191  ret void
192
193else:
194  tail call void @external_b()
195  ret void
196}
197
198; Test a function that gets special select lowering into CFG with copied EFLAGS
199; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
200; cross-block rewrites in at least some narrow cases.
201define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2, i32 %x) nounwind {
202; X32-LABEL: PR37100:
203; X32:       # %bb.0: # %bb
204; X32-NEXT:    pushl %ebp
205; X32-NEXT:    pushl %ebx
206; X32-NEXT:    pushl %edi
207; X32-NEXT:    pushl %esi
208; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
209; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
210; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
211; X32-NEXT:    movb {{[0-9]+}}(%esp), %ch
212; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
213; X32-NEXT:    jmp .LBB3_1
214; X32-NEXT:    .p2align 4, 0x90
215; X32-NEXT:  .LBB3_5: # %bb1
216; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
217; X32-NEXT:    movl %esi, %eax
218; X32-NEXT:    cltd
219; X32-NEXT:    idivl %edi
220; X32-NEXT:  .LBB3_1: # %bb1
221; X32-NEXT:    # =>This Inner Loop Header: Depth=1
222; X32-NEXT:    movsbl %cl, %eax
223; X32-NEXT:    movl %eax, %edx
224; X32-NEXT:    sarl $31, %edx
225; X32-NEXT:    cmpl %eax, {{[0-9]+}}(%esp)
226; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
227; X32-NEXT:    sbbl %edx, %eax
228; X32-NEXT:    setl %al
229; X32-NEXT:    setl %dl
230; X32-NEXT:    movzbl %dl, %edi
231; X32-NEXT:    negl %edi
232; X32-NEXT:    testb %al, %al
233; X32-NEXT:    jne .LBB3_3
234; X32-NEXT:  # %bb.2: # %bb1
235; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
236; X32-NEXT:    movb %ch, %cl
237; X32-NEXT:  .LBB3_3: # %bb1
238; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
239; X32-NEXT:    movb %cl, (%ebp)
240; X32-NEXT:    movl (%ebx), %edx
241; X32-NEXT:    testb %al, %al
242; X32-NEXT:    jne .LBB3_5
243; X32-NEXT:  # %bb.4: # %bb1
244; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
245; X32-NEXT:    movl %edx, %edi
246; X32-NEXT:    jmp .LBB3_5
247;
248; X64-LABEL: PR37100:
249; X64:       # %bb.0: # %bb
250; X64-NEXT:    movq %rdx, %rsi
251; X64-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
252; X64-NEXT:    movzbl %cl, %r11d
253; X64-NEXT:    .p2align 4, 0x90
254; X64-NEXT:  .LBB3_1: # %bb1
255; X64-NEXT:    # =>This Inner Loop Header: Depth=1
256; X64-NEXT:    movsbq %dil, %rax
257; X64-NEXT:    xorl %ecx, %ecx
258; X64-NEXT:    cmpq %rax, %rsi
259; X64-NEXT:    setl %cl
260; X64-NEXT:    negl %ecx
261; X64-NEXT:    cmpq %rax, %rsi
262; X64-NEXT:    movzbl %al, %edi
263; X64-NEXT:    cmovgel %r11d, %edi
264; X64-NEXT:    movb %dil, (%r8)
265; X64-NEXT:    cmovgel (%r9), %ecx
266; X64-NEXT:    movl %r10d, %eax
267; X64-NEXT:    cltd
268; X64-NEXT:    idivl %ecx
269; X64-NEXT:    jmp .LBB3_1
270bb:
271  br label %bb1
272
273bb1:
274  %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
275  %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
276  %tmp3 = icmp sgt i16 %tmp2, 7
277  %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
278  %tmp5 = sext i8 %tmp to i64
279  %tmp6 = icmp slt i64 %arg3, %tmp5
280  %tmp7 = sext i1 %tmp6 to i32
281  %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
282  store volatile i8 %tmp8, i8* %ptr1
283  %tmp9 = load volatile i32, i32* %ptr2
284  %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
285  %tmp11 = srem i32 %x, %tmp10
286  %tmp12 = trunc i32 %tmp11 to i16
287  br label %bb1
288}
289
290; Use a particular instruction pattern in order to lower to the post-RA pseudo
291; used to lower SETB into an SBB pattern in order to make sure that kind of
292; usage of a copied EFLAGS continues to work.
293define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64 %arg5) nounwind {
294; X32-LABEL: PR37431:
295; X32:       # %bb.0: # %entry
296; X32-NEXT:    pushl %ebx
297; X32-NEXT:    pushl %edi
298; X32-NEXT:    pushl %esi
299; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
300; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
301; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
302; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
303; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
304; X32-NEXT:    movl (%edi), %edi
305; X32-NEXT:    movl %edi, %ebx
306; X32-NEXT:    sarl $31, %ebx
307; X32-NEXT:    cmpl %edi, {{[0-9]+}}(%esp)
308; X32-NEXT:    sbbl %ebx, %esi
309; X32-NEXT:    sbbl %ebx, %ebx
310; X32-NEXT:    movb %bl, (%edx)
311; X32-NEXT:    cltd
312; X32-NEXT:    idivl %ebx
313; X32-NEXT:    movb %dl, (%ecx)
314; X32-NEXT:    popl %esi
315; X32-NEXT:    popl %edi
316; X32-NEXT:    popl %ebx
317; X32-NEXT:    retl
318;
319; X64-LABEL: PR37431:
320; X64:       # %bb.0: # %entry
321; X64-NEXT:    movl %ecx, %eax
322; X64-NEXT:    movq %rdx, %rcx
323; X64-NEXT:    movslq (%rdi), %rdx
324; X64-NEXT:    cmpq %rdx, %r8
325; X64-NEXT:    sbbl %edi, %edi
326; X64-NEXT:    movb %dil, (%rsi)
327; X64-NEXT:    cltd
328; X64-NEXT:    idivl %edi
329; X64-NEXT:    movb %dl, (%rcx)
330; X64-NEXT:    retq
331entry:
332  %tmp = load i32, i32* %arg1
333  %tmp1 = sext i32 %tmp to i64
334  %tmp2 = icmp ugt i64 %tmp1, %arg5
335  %tmp3 = zext i1 %tmp2 to i8
336  %tmp4 = sub i8 0, %tmp3
337  store i8 %tmp4, i8* %arg2
338  %tmp5 = sext i8 %tmp4 to i32
339  %tmp6 = srem i32 %arg4, %tmp5
340  %tmp7 = trunc i32 %tmp6 to i8
341  store i8 %tmp7, i8* %arg3
342  ret void
343}
344