• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X32
3; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X64
4;
5; Test patterns that require preserving and restoring flags.
6
7@b = common global i8 0, align 1
8@c = common global i32 0, align 4
9@a = common global i8 0, align 1
10@d = common global i8 0, align 1
11@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
12
13declare void @external(i32)
14
15; A test that re-uses flags in interesting ways due to volatile accesses.
16; Specifically, the first increment's flags are reused for the branch despite
17; being clobbered by the second increment.
18define i32 @test1() nounwind {
19; X32-LABEL: test1:
20; X32:       # %bb.0: # %entry
21; X32-NEXT:    movb b, %cl
22; X32-NEXT:    movl %ecx, %eax
23; X32-NEXT:    incb %al
24; X32-NEXT:    movb %al, b
25; X32-NEXT:    incl c
26; X32-NEXT:    sete %dl
27; X32-NEXT:    movb a, %ah
28; X32-NEXT:    movb %ah, %ch
29; X32-NEXT:    incb %ch
30; X32-NEXT:    cmpb %cl, %ah
31; X32-NEXT:    sete d
32; X32-NEXT:    movb %ch, a
33; X32-NEXT:    testb %dl, %dl
34; X32-NEXT:    jne .LBB0_2
35; X32-NEXT:  # %bb.1: # %if.then
36; X32-NEXT:    movsbl %al, %eax
37; X32-NEXT:    pushl %eax
38; X32-NEXT:    calll external
39; X32-NEXT:    addl $4, %esp
40; X32-NEXT:  .LBB0_2: # %if.end
41; X32-NEXT:    xorl %eax, %eax
42; X32-NEXT:    retl
43;
44; X64-LABEL: test1:
45; X64:       # %bb.0: # %entry
46; X64-NEXT:    movb {{.*}}(%rip), %dil
47; X64-NEXT:    movl %edi, %eax
48; X64-NEXT:    incb %al
49; X64-NEXT:    movb %al, {{.*}}(%rip)
50; X64-NEXT:    incl {{.*}}(%rip)
51; X64-NEXT:    sete %sil
52; X64-NEXT:    movb {{.*}}(%rip), %cl
53; X64-NEXT:    movl %ecx, %edx
54; X64-NEXT:    incb %dl
55; X64-NEXT:    cmpb %dil, %cl
56; X64-NEXT:    sete {{.*}}(%rip)
57; X64-NEXT:    movb %dl, {{.*}}(%rip)
58; X64-NEXT:    testb %sil, %sil
59; X64-NEXT:    jne .LBB0_2
60; X64-NEXT:  # %bb.1: # %if.then
61; X64-NEXT:    pushq %rax
62; X64-NEXT:    movsbl %al, %edi
63; X64-NEXT:    callq external
64; X64-NEXT:    addq $8, %rsp
65; X64-NEXT:  .LBB0_2: # %if.end
66; X64-NEXT:    xorl %eax, %eax
67; X64-NEXT:    retq
68entry:
69  %bval = load i8, i8* @b
70  %inc = add i8 %bval, 1
71  store volatile i8 %inc, i8* @b
72  %cval = load volatile i32, i32* @c
73  %inc1 = add nsw i32 %cval, 1
74  store volatile i32 %inc1, i32* @c
75  %aval = load volatile i8, i8* @a
76  %inc2 = add i8 %aval, 1
77  store volatile i8 %inc2, i8* @a
78  %cmp = icmp eq i8 %aval, %bval
79  %conv5 = zext i1 %cmp to i8
80  store i8 %conv5, i8* @d
81  %tobool = icmp eq i32 %inc1, 0
82  br i1 %tobool, label %if.end, label %if.then
83
84if.then:
85  %conv6 = sext i8 %inc to i32
86  call void @external(i32 %conv6)
87  br label %if.end
88
89if.end:
90  ret i32 0
91}
92
93; Preserve increment flags across a call.
94define i32 @test2(i32* %ptr) nounwind {
95; X32-LABEL: test2:
96; X32:       # %bb.0: # %entry
97; X32-NEXT:    pushl %ebx
98; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
99; X32-NEXT:    incl (%eax)
100; X32-NEXT:    setne %bl
101; X32-NEXT:    pushl $42
102; X32-NEXT:    calll external
103; X32-NEXT:    addl $4, %esp
104; X32-NEXT:    testb %bl, %bl
105; X32-NEXT:    je .LBB1_1
106; X32-NEXT:  # %bb.2: # %else
107; X32-NEXT:    xorl %eax, %eax
108; X32-NEXT:    popl %ebx
109; X32-NEXT:    retl
110; X32-NEXT:  .LBB1_1: # %then
111; X32-NEXT:    movl $64, %eax
112; X32-NEXT:    popl %ebx
113; X32-NEXT:    retl
114;
115; X64-LABEL: test2:
116; X64:       # %bb.0: # %entry
117; X64-NEXT:    pushq %rbx
118; X64-NEXT:    incl (%rdi)
119; X64-NEXT:    setne %bl
120; X64-NEXT:    movl $42, %edi
121; X64-NEXT:    callq external
122; X64-NEXT:    testb %bl, %bl
123; X64-NEXT:    je .LBB1_1
124; X64-NEXT:  # %bb.2: # %else
125; X64-NEXT:    xorl %eax, %eax
126; X64-NEXT:    popq %rbx
127; X64-NEXT:    retq
128; X64-NEXT:  .LBB1_1: # %then
129; X64-NEXT:    movl $64, %eax
130; X64-NEXT:    popq %rbx
131; X64-NEXT:    retq
132entry:
133  %val = load i32, i32* %ptr
134  %inc = add i32 %val, 1
135  store i32 %inc, i32* %ptr
136  %cmp = icmp eq i32 %inc, 0
137  call void @external(i32 42)
138  br i1 %cmp, label %then, label %else
139
140then:
141  ret i32 64
142
143else:
144  ret i32 0
145}
146
147declare void @external_a()
148declare void @external_b()
149
150; This lowers to a conditional tail call instead of a conditional branch. This
151; is tricky because we can only do this from a leaf function, and so we have to
152; use volatile stores similar to test1 to force the save and restore of
153; a condition without calling another function. We then set up subsequent calls
154; in tail position.
155define void @test_tail_call(i32* %ptr) nounwind optsize {
156; X32-LABEL: test_tail_call:
157; X32:       # %bb.0: # %entry
158; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
159; X32-NEXT:    incl (%eax)
160; X32-NEXT:    setne %al
161; X32-NEXT:    incb a
162; X32-NEXT:    sete d
163; X32-NEXT:    testb %al, %al
164; X32-NEXT:    jne external_b # TAILCALL
165; X32-NEXT:  # %bb.1: # %then
166; X32-NEXT:    jmp external_a # TAILCALL
167;
168; X64-LABEL: test_tail_call:
169; X64:       # %bb.0: # %entry
170; X64-NEXT:    incl (%rdi)
171; X64-NEXT:    setne %al
172; X64-NEXT:    incb {{.*}}(%rip)
173; X64-NEXT:    sete {{.*}}(%rip)
174; X64-NEXT:    testb %al, %al
175; X64-NEXT:    jne external_b # TAILCALL
176; X64-NEXT:  # %bb.1: # %then
177; X64-NEXT:    jmp external_a # TAILCALL
178entry:
179  %val = load i32, i32* %ptr
180  %inc = add i32 %val, 1
181  store i32 %inc, i32* %ptr
182  %cmp = icmp eq i32 %inc, 0
183  %aval = load volatile i8, i8* @a
184  %inc2 = add i8 %aval, 1
185  store volatile i8 %inc2, i8* @a
186  %cmp2 = icmp eq i8 %inc2, 0
187  %conv5 = zext i1 %cmp2 to i8
188  store i8 %conv5, i8* @d
189  br i1 %cmp, label %then, label %else
190
191then:
192  tail call void @external_a()
193  ret void
194
195else:
196  tail call void @external_b()
197  ret void
198}
199
200; Test a function that gets special select lowering into CFG with copied EFLAGS
201; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
202; cross-block rewrites in at least some narrow cases.
203define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) {
204; X32-LABEL: PR37100:
205; X32:       # %bb.0: # %bb
206; X32-NEXT:    pushl %ebp
207; X32-NEXT:    .cfi_def_cfa_offset 8
208; X32-NEXT:    pushl %ebx
209; X32-NEXT:    .cfi_def_cfa_offset 12
210; X32-NEXT:    pushl %edi
211; X32-NEXT:    .cfi_def_cfa_offset 16
212; X32-NEXT:    pushl %esi
213; X32-NEXT:    .cfi_def_cfa_offset 20
214; X32-NEXT:    .cfi_offset %esi, -20
215; X32-NEXT:    .cfi_offset %edi, -16
216; X32-NEXT:    .cfi_offset %ebx, -12
217; X32-NEXT:    .cfi_offset %ebp, -8
218; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
219; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
220; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
221; X32-NEXT:    movb {{[0-9]+}}(%esp), %ch
222; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
223; X32-NEXT:    jmp .LBB3_1
224; X32-NEXT:    .p2align 4, 0x90
225; X32-NEXT:  .LBB3_5: # %bb1
226; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
227; X32-NEXT:    xorl %eax, %eax
228; X32-NEXT:    xorl %edx, %edx
229; X32-NEXT:    idivl %ebp
230; X32-NEXT:  .LBB3_1: # %bb1
231; X32-NEXT:    # =>This Inner Loop Header: Depth=1
232; X32-NEXT:    movsbl %cl, %eax
233; X32-NEXT:    movl %eax, %edx
234; X32-NEXT:    sarl $31, %edx
235; X32-NEXT:    cmpl %eax, %esi
236; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
237; X32-NEXT:    sbbl %edx, %eax
238; X32-NEXT:    setl %al
239; X32-NEXT:    setl %dl
240; X32-NEXT:    movzbl %dl, %ebp
241; X32-NEXT:    negl %ebp
242; X32-NEXT:    testb %al, %al
243; X32-NEXT:    jne .LBB3_3
244; X32-NEXT:  # %bb.2: # %bb1
245; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
246; X32-NEXT:    movb %ch, %cl
247; X32-NEXT:  .LBB3_3: # %bb1
248; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
249; X32-NEXT:    movb %cl, (%ebx)
250; X32-NEXT:    movl (%edi), %edx
251; X32-NEXT:    testb %al, %al
252; X32-NEXT:    jne .LBB3_5
253; X32-NEXT:  # %bb.4: # %bb1
254; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
255; X32-NEXT:    movl %edx, %ebp
256; X32-NEXT:    jmp .LBB3_5
257;
258; X64-LABEL: PR37100:
259; X64:       # %bb.0: # %bb
260; X64-NEXT:    movq %rdx, %r10
261; X64-NEXT:    jmp .LBB3_1
262; X64-NEXT:    .p2align 4, 0x90
263; X64-NEXT:  .LBB3_5: # %bb1
264; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
265; X64-NEXT:    xorl %eax, %eax
266; X64-NEXT:    xorl %edx, %edx
267; X64-NEXT:    idivl %esi
268; X64-NEXT:  .LBB3_1: # %bb1
269; X64-NEXT:    # =>This Inner Loop Header: Depth=1
270; X64-NEXT:    movsbq %dil, %rax
271; X64-NEXT:    xorl %esi, %esi
272; X64-NEXT:    cmpq %rax, %r10
273; X64-NEXT:    setl %sil
274; X64-NEXT:    negl %esi
275; X64-NEXT:    cmpq %rax, %r10
276; X64-NEXT:    jl .LBB3_3
277; X64-NEXT:  # %bb.2: # %bb1
278; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
279; X64-NEXT:    movl %ecx, %edi
280; X64-NEXT:  .LBB3_3: # %bb1
281; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
282; X64-NEXT:    movb %dil, (%r8)
283; X64-NEXT:    jl .LBB3_5
284; X64-NEXT:  # %bb.4: # %bb1
285; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
286; X64-NEXT:    movl (%r9), %esi
287; X64-NEXT:    jmp .LBB3_5
288bb:
289  br label %bb1
290
291bb1:
292  %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
293  %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
294  %tmp3 = icmp sgt i16 %tmp2, 7
295  %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
296  %tmp5 = sext i8 %tmp to i64
297  %tmp6 = icmp slt i64 %arg3, %tmp5
298  %tmp7 = sext i1 %tmp6 to i32
299  %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
300  store volatile i8 %tmp8, i8* %ptr1
301  %tmp9 = load volatile i32, i32* %ptr2
302  %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
303  %tmp11 = srem i32 0, %tmp10
304  %tmp12 = trunc i32 %tmp11 to i16
305  br label %bb1
306}
307
308; Use a particular instruction pattern in order to lower to the post-RA pseudo
309; used to lower SETB into an SBB pattern in order to make sure that kind of
310; usage of a copied EFLAGS continues to work.
311define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
312; X32-LABEL: PR37431:
313; X32:       # %bb.0: # %entry
314; X32-NEXT:    pushl %esi
315; X32-NEXT:    .cfi_def_cfa_offset 8
316; X32-NEXT:    .cfi_offset %esi, -8
317; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
318; X32-NEXT:    movl (%eax), %eax
319; X32-NEXT:    movl %eax, %ecx
320; X32-NEXT:    sarl $31, %ecx
321; X32-NEXT:    cmpl %eax, %eax
322; X32-NEXT:    sbbl %ecx, %eax
323; X32-NEXT:    setb %al
324; X32-NEXT:    sbbb %cl, %cl
325; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
326; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
327; X32-NEXT:    movb %cl, (%edx)
328; X32-NEXT:    movzbl %al, %eax
329; X32-NEXT:    xorl %ecx, %ecx
330; X32-NEXT:    subl %eax, %ecx
331; X32-NEXT:    xorl %eax, %eax
332; X32-NEXT:    xorl %edx, %edx
333; X32-NEXT:    idivl %ecx
334; X32-NEXT:    movb %dl, (%esi)
335; X32-NEXT:    popl %esi
336; X32-NEXT:    .cfi_def_cfa_offset 4
337; X32-NEXT:    retl
338;
339; X64-LABEL: PR37431:
340; X64:       # %bb.0: # %entry
341; X64-NEXT:    movq %rdx, %rcx
342; X64-NEXT:    movslq (%rdi), %rax
343; X64-NEXT:    cmpq %rax, %rax
344; X64-NEXT:    sbbb %dl, %dl
345; X64-NEXT:    cmpq %rax, %rax
346; X64-NEXT:    movb %dl, (%rsi)
347; X64-NEXT:    sbbl %esi, %esi
348; X64-NEXT:    xorl %eax, %eax
349; X64-NEXT:    xorl %edx, %edx
350; X64-NEXT:    idivl %esi
351; X64-NEXT:    movb %dl, (%rcx)
352; X64-NEXT:    retq
353entry:
354  %tmp = load i32, i32* %arg1
355  %tmp1 = sext i32 %tmp to i64
356  %tmp2 = icmp ugt i64 %tmp1, undef
357  %tmp3 = zext i1 %tmp2 to i8
358  %tmp4 = sub i8 0, %tmp3
359  store i8 %tmp4, i8* %arg2
360  %tmp5 = sext i8 %tmp4 to i32
361  %tmp6 = srem i32 0, %tmp5
362  %tmp7 = trunc i32 %tmp6 to i8
363  store i8 %tmp7, i8* %arg3
364  ret void
365}
366