1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefix=X32 3; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefix=X64 4; 5; Test patterns that require preserving and restoring flags. 6 7@b = common global i8 0, align 1 8@c = common global i32 0, align 4 9@a = common global i8 0, align 1 10@d = common global i8 0, align 1 11@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 12 13declare dso_local void @external(i32) 14 15; A test that re-uses flags in interesting ways due to volatile accesses. 16; Specifically, the first increment's flags are reused for the branch despite 17; being clobbered by the second increment. 18define i32 @test1() nounwind { 19; X32-LABEL: test1: 20; X32: # %bb.0: # %entry 21; X32-NEXT: movb b, %cl 22; X32-NEXT: movl %ecx, %eax 23; X32-NEXT: incb %al 24; X32-NEXT: movb %al, b 25; X32-NEXT: incl c 26; X32-NEXT: sete %dl 27; X32-NEXT: movb a, %ah 28; X32-NEXT: movb %ah, %ch 29; X32-NEXT: incb %ch 30; X32-NEXT: cmpb %cl, %ah 31; X32-NEXT: sete d 32; X32-NEXT: movb %ch, a 33; X32-NEXT: testb %dl, %dl 34; X32-NEXT: jne .LBB0_2 35; X32-NEXT: # %bb.1: # %if.then 36; X32-NEXT: movsbl %al, %eax 37; X32-NEXT: pushl %eax 38; X32-NEXT: calll external 39; X32-NEXT: addl $4, %esp 40; X32-NEXT: .LBB0_2: # %if.end 41; X32-NEXT: xorl %eax, %eax 42; X32-NEXT: retl 43; 44; X64-LABEL: test1: 45; X64: # %bb.0: # %entry 46; X64-NEXT: pushq %rax 47; X64-NEXT: movb {{.*}}(%rip), %cl 48; X64-NEXT: leal 1(%rcx), %eax 49; X64-NEXT: movb %al, {{.*}}(%rip) 50; X64-NEXT: incl {{.*}}(%rip) 51; X64-NEXT: sete %dl 52; X64-NEXT: movb {{.*}}(%rip), %sil 53; X64-NEXT: leal 1(%rsi), %edi 54; X64-NEXT: cmpb %cl, %sil 55; X64-NEXT: sete {{.*}}(%rip) 56; X64-NEXT: movb %dil, {{.*}}(%rip) 57; X64-NEXT: testb %dl, %dl 58; X64-NEXT: jne .LBB0_2 59; X64-NEXT: # %bb.1: # %if.then 60; X64-NEXT: movsbl %al, %edi 61; X64-NEXT: callq external 62; X64-NEXT: .LBB0_2: # %if.end 63; X64-NEXT: xorl %eax, %eax 64; X64-NEXT: popq %rcx 65; X64-NEXT: retq 66entry: 67 %bval = load i8, i8* @b 68 %inc = add i8 %bval, 1 69 store volatile i8 %inc, i8* @b 70 %cval = load volatile i32, i32* @c 71 %inc1 = add nsw i32 %cval, 1 72 store volatile i32 %inc1, i32* @c 73 %aval = load volatile i8, i8* @a 74 %inc2 = add i8 %aval, 1 75 store volatile i8 %inc2, i8* @a 76 %cmp = icmp eq i8 %aval, %bval 77 %conv5 = zext i1 %cmp to i8 78 store i8 %conv5, i8* @d 79 %tobool = icmp eq i32 %inc1, 0 80 br i1 %tobool, label %if.end, label %if.then 81 82if.then: 83 %conv6 = sext i8 %inc to i32 84 call void @external(i32 %conv6) 85 br label %if.end 86 87if.end: 88 ret i32 0 89} 90 91; Preserve increment flags across a call. 92define i32 @test2(i32* %ptr) nounwind { 93; X32-LABEL: test2: 94; X32: # %bb.0: # %entry 95; X32-NEXT: pushl %ebx 96; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 97; X32-NEXT: incl (%eax) 98; X32-NEXT: setne %bl 99; X32-NEXT: pushl $42 100; X32-NEXT: calll external 101; X32-NEXT: addl $4, %esp 102; X32-NEXT: testb %bl, %bl 103; X32-NEXT: jne .LBB1_2 104; X32-NEXT: # %bb.1: # %then 105; X32-NEXT: movl $64, %eax 106; X32-NEXT: popl %ebx 107; X32-NEXT: retl 108; X32-NEXT: .LBB1_2: # %else 109; X32-NEXT: xorl %eax, %eax 110; X32-NEXT: popl %ebx 111; X32-NEXT: retl 112; 113; X64-LABEL: test2: 114; X64: # %bb.0: # %entry 115; X64-NEXT: pushq %rbx 116; X64-NEXT: incl (%rdi) 117; X64-NEXT: setne %bl 118; X64-NEXT: movl $42, %edi 119; X64-NEXT: callq external 120; X64-NEXT: testb %bl, %bl 121; X64-NEXT: jne .LBB1_2 122; X64-NEXT: # %bb.1: # %then 123; X64-NEXT: movl $64, %eax 124; X64-NEXT: popq %rbx 125; X64-NEXT: retq 126; X64-NEXT: .LBB1_2: # %else 127; X64-NEXT: xorl %eax, %eax 128; X64-NEXT: popq %rbx 129; X64-NEXT: retq 130entry: 131 %val = load i32, i32* %ptr 132 %inc = add i32 %val, 1 133 store i32 %inc, i32* %ptr 134 %cmp = icmp eq i32 %inc, 0 135 call void @external(i32 42) 136 br i1 %cmp, label %then, label %else 137 138then: 139 ret i32 64 140 141else: 142 ret i32 0 143} 144 145declare dso_local void @external_a() 146declare dso_local void @external_b() 147 148; This lowers to a conditional tail call instead of a conditional branch. This 149; is tricky because we can only do this from a leaf function, and so we have to 150; use volatile stores similar to test1 to force the save and restore of 151; a condition without calling another function. We then set up subsequent calls 152; in tail position. 153define void @test_tail_call(i32* %ptr) nounwind optsize { 154; X32-LABEL: test_tail_call: 155; X32: # %bb.0: # %entry 156; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 157; X32-NEXT: incl (%eax) 158; X32-NEXT: setne %al 159; X32-NEXT: incb a 160; X32-NEXT: sete d 161; X32-NEXT: testb %al, %al 162; X32-NEXT: jne external_b # TAILCALL 163; X32-NEXT: # %bb.1: # %then 164; X32-NEXT: jmp external_a # TAILCALL 165; 166; X64-LABEL: test_tail_call: 167; X64: # %bb.0: # %entry 168; X64-NEXT: incl (%rdi) 169; X64-NEXT: setne %al 170; X64-NEXT: incb {{.*}}(%rip) 171; X64-NEXT: sete {{.*}}(%rip) 172; X64-NEXT: testb %al, %al 173; X64-NEXT: jne external_b # TAILCALL 174; X64-NEXT: # %bb.1: # %then 175; X64-NEXT: jmp external_a # TAILCALL 176entry: 177 %val = load i32, i32* %ptr 178 %inc = add i32 %val, 1 179 store i32 %inc, i32* %ptr 180 %cmp = icmp eq i32 %inc, 0 181 %aval = load volatile i8, i8* @a 182 %inc2 = add i8 %aval, 1 183 store volatile i8 %inc2, i8* @a 184 %cmp2 = icmp eq i8 %inc2, 0 185 %conv5 = zext i1 %cmp2 to i8 186 store i8 %conv5, i8* @d 187 br i1 %cmp, label %then, label %else 188 189then: 190 tail call void @external_a() 191 ret void 192 193else: 194 tail call void @external_b() 195 ret void 196} 197 198; Test a function that gets special select lowering into CFG with copied EFLAGS 199; threaded across the CFG. This requires our EFLAGS copy rewriting to handle 200; cross-block rewrites in at least some narrow cases. 201define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2, i32 %x) nounwind { 202; X32-LABEL: PR37100: 203; X32: # %bb.0: # %bb 204; X32-NEXT: pushl %ebp 205; X32-NEXT: pushl %ebx 206; X32-NEXT: pushl %edi 207; X32-NEXT: pushl %esi 208; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 209; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx 210; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp 211; X32-NEXT: movb {{[0-9]+}}(%esp), %ch 212; X32-NEXT: movb {{[0-9]+}}(%esp), %cl 213; X32-NEXT: jmp .LBB3_1 214; X32-NEXT: .p2align 4, 0x90 215; X32-NEXT: .LBB3_5: # %bb1 216; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 217; X32-NEXT: movl %esi, %eax 218; X32-NEXT: cltd 219; X32-NEXT: idivl %edi 220; X32-NEXT: .LBB3_1: # %bb1 221; X32-NEXT: # =>This Inner Loop Header: Depth=1 222; X32-NEXT: movsbl %cl, %eax 223; X32-NEXT: movl %eax, %edx 224; X32-NEXT: sarl $31, %edx 225; X32-NEXT: cmpl %eax, {{[0-9]+}}(%esp) 226; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 227; X32-NEXT: sbbl %edx, %eax 228; X32-NEXT: setl %al 229; X32-NEXT: setl %dl 230; X32-NEXT: movzbl %dl, %edi 231; X32-NEXT: negl %edi 232; X32-NEXT: testb %al, %al 233; X32-NEXT: jne .LBB3_3 234; X32-NEXT: # %bb.2: # %bb1 235; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 236; X32-NEXT: movb %ch, %cl 237; X32-NEXT: .LBB3_3: # %bb1 238; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 239; X32-NEXT: movb %cl, (%ebp) 240; X32-NEXT: movl (%ebx), %edx 241; X32-NEXT: testb %al, %al 242; X32-NEXT: jne .LBB3_5 243; X32-NEXT: # %bb.4: # %bb1 244; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 245; X32-NEXT: movl %edx, %edi 246; X32-NEXT: jmp .LBB3_5 247; 248; X64-LABEL: PR37100: 249; X64: # %bb.0: # %bb 250; X64-NEXT: movq %rdx, %rsi 251; X64-NEXT: movl {{[0-9]+}}(%rsp), %r10d 252; X64-NEXT: movzbl %cl, %r11d 253; X64-NEXT: .p2align 4, 0x90 254; X64-NEXT: .LBB3_1: # %bb1 255; X64-NEXT: # =>This Inner Loop Header: Depth=1 256; X64-NEXT: movsbq %dil, %rax 257; X64-NEXT: xorl %ecx, %ecx 258; X64-NEXT: cmpq %rax, %rsi 259; X64-NEXT: setl %cl 260; X64-NEXT: negl %ecx 261; X64-NEXT: cmpq %rax, %rsi 262; X64-NEXT: movzbl %al, %edi 263; X64-NEXT: cmovgel %r11d, %edi 264; X64-NEXT: movb %dil, (%r8) 265; X64-NEXT: cmovgel (%r9), %ecx 266; X64-NEXT: movl %r10d, %eax 267; X64-NEXT: cltd 268; X64-NEXT: idivl %ecx 269; X64-NEXT: jmp .LBB3_1 270bb: 271 br label %bb1 272 273bb1: 274 %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ] 275 %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ] 276 %tmp3 = icmp sgt i16 %tmp2, 7 277 %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7 278 %tmp5 = sext i8 %tmp to i64 279 %tmp6 = icmp slt i64 %arg3, %tmp5 280 %tmp7 = sext i1 %tmp6 to i32 281 %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4 282 store volatile i8 %tmp8, i8* %ptr1 283 %tmp9 = load volatile i32, i32* %ptr2 284 %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9 285 %tmp11 = srem i32 %x, %tmp10 286 %tmp12 = trunc i32 %tmp11 to i16 287 br label %bb1 288} 289 290; Use a particular instruction pattern in order to lower to the post-RA pseudo 291; used to lower SETB into an SBB pattern in order to make sure that kind of 292; usage of a copied EFLAGS continues to work. 293define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64 %arg5) nounwind { 294; X32-LABEL: PR37431: 295; X32: # %bb.0: # %entry 296; X32-NEXT: pushl %ebx 297; X32-NEXT: pushl %edi 298; X32-NEXT: pushl %esi 299; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 300; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 301; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 302; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 303; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 304; X32-NEXT: movl (%edi), %edi 305; X32-NEXT: movl %edi, %ebx 306; X32-NEXT: sarl $31, %ebx 307; X32-NEXT: cmpl %edi, {{[0-9]+}}(%esp) 308; X32-NEXT: sbbl %ebx, %esi 309; X32-NEXT: sbbl %ebx, %ebx 310; X32-NEXT: movb %bl, (%edx) 311; X32-NEXT: cltd 312; X32-NEXT: idivl %ebx 313; X32-NEXT: movb %dl, (%ecx) 314; X32-NEXT: popl %esi 315; X32-NEXT: popl %edi 316; X32-NEXT: popl %ebx 317; X32-NEXT: retl 318; 319; X64-LABEL: PR37431: 320; X64: # %bb.0: # %entry 321; X64-NEXT: movl %ecx, %eax 322; X64-NEXT: movq %rdx, %rcx 323; X64-NEXT: movslq (%rdi), %rdx 324; X64-NEXT: cmpq %rdx, %r8 325; X64-NEXT: sbbl %edi, %edi 326; X64-NEXT: movb %dil, (%rsi) 327; X64-NEXT: cltd 328; X64-NEXT: idivl %edi 329; X64-NEXT: movb %dl, (%rcx) 330; X64-NEXT: retq 331entry: 332 %tmp = load i32, i32* %arg1 333 %tmp1 = sext i32 %tmp to i64 334 %tmp2 = icmp ugt i64 %tmp1, %arg5 335 %tmp3 = zext i1 %tmp2 to i8 336 %tmp4 = sub i8 0, %tmp3 337 store i8 %tmp4, i8* %arg2 338 %tmp5 = sext i8 %tmp4 to i32 339 %tmp6 = srem i32 %arg4, %tmp5 340 %tmp7 = trunc i32 %tmp6 to i8 341 store i8 %tmp7, i8* %arg3 342 ret void 343} 344