1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X32 3; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X64 4; 5; Test patterns that require preserving and restoring flags. 6 7@b = common global i8 0, align 1 8@c = common global i32 0, align 4 9@a = common global i8 0, align 1 10@d = common global i8 0, align 1 11@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 12 13declare void @external(i32) 14 15; A test that re-uses flags in interesting ways due to volatile accesses. 16; Specifically, the first increment's flags are reused for the branch despite 17; being clobbered by the second increment. 18define i32 @test1() nounwind { 19; X32-LABEL: test1: 20; X32: # %bb.0: # %entry 21; X32-NEXT: movb b, %cl 22; X32-NEXT: movl %ecx, %eax 23; X32-NEXT: incb %al 24; X32-NEXT: movb %al, b 25; X32-NEXT: incl c 26; X32-NEXT: sete %dl 27; X32-NEXT: movb a, %ah 28; X32-NEXT: movb %ah, %ch 29; X32-NEXT: incb %ch 30; X32-NEXT: cmpb %cl, %ah 31; X32-NEXT: sete d 32; X32-NEXT: movb %ch, a 33; X32-NEXT: testb %dl, %dl 34; X32-NEXT: jne .LBB0_2 35; X32-NEXT: # %bb.1: # %if.then 36; X32-NEXT: movsbl %al, %eax 37; X32-NEXT: pushl %eax 38; X32-NEXT: calll external 39; X32-NEXT: addl $4, %esp 40; X32-NEXT: .LBB0_2: # %if.end 41; X32-NEXT: xorl %eax, %eax 42; X32-NEXT: retl 43; 44; X64-LABEL: test1: 45; X64: # %bb.0: # %entry 46; X64-NEXT: movb {{.*}}(%rip), %dil 47; X64-NEXT: movl %edi, %eax 48; X64-NEXT: incb %al 49; X64-NEXT: movb %al, {{.*}}(%rip) 50; X64-NEXT: incl {{.*}}(%rip) 51; X64-NEXT: sete %sil 52; X64-NEXT: movb {{.*}}(%rip), %cl 53; X64-NEXT: movl %ecx, %edx 54; X64-NEXT: incb %dl 55; X64-NEXT: cmpb %dil, %cl 56; X64-NEXT: sete {{.*}}(%rip) 57; X64-NEXT: movb %dl, {{.*}}(%rip) 58; X64-NEXT: testb %sil, %sil 59; X64-NEXT: jne .LBB0_2 60; X64-NEXT: # %bb.1: # %if.then 61; X64-NEXT: pushq %rax 62; X64-NEXT: movsbl %al, %edi 63; X64-NEXT: callq external 64; X64-NEXT: addq $8, %rsp 65; X64-NEXT: .LBB0_2: # %if.end 66; X64-NEXT: xorl %eax, %eax 67; X64-NEXT: retq 68entry: 69 %bval = load i8, i8* @b 70 %inc = add i8 %bval, 1 71 store volatile i8 %inc, i8* @b 72 %cval = load volatile i32, i32* @c 73 %inc1 = add nsw i32 %cval, 1 74 store volatile i32 %inc1, i32* @c 75 %aval = load volatile i8, i8* @a 76 %inc2 = add i8 %aval, 1 77 store volatile i8 %inc2, i8* @a 78 %cmp = icmp eq i8 %aval, %bval 79 %conv5 = zext i1 %cmp to i8 80 store i8 %conv5, i8* @d 81 %tobool = icmp eq i32 %inc1, 0 82 br i1 %tobool, label %if.end, label %if.then 83 84if.then: 85 %conv6 = sext i8 %inc to i32 86 call void @external(i32 %conv6) 87 br label %if.end 88 89if.end: 90 ret i32 0 91} 92 93; Preserve increment flags across a call. 94define i32 @test2(i32* %ptr) nounwind { 95; X32-LABEL: test2: 96; X32: # %bb.0: # %entry 97; X32-NEXT: pushl %ebx 98; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 99; X32-NEXT: incl (%eax) 100; X32-NEXT: setne %bl 101; X32-NEXT: pushl $42 102; X32-NEXT: calll external 103; X32-NEXT: addl $4, %esp 104; X32-NEXT: testb %bl, %bl 105; X32-NEXT: je .LBB1_1 106; X32-NEXT: # %bb.2: # %else 107; X32-NEXT: xorl %eax, %eax 108; X32-NEXT: popl %ebx 109; X32-NEXT: retl 110; X32-NEXT: .LBB1_1: # %then 111; X32-NEXT: movl $64, %eax 112; X32-NEXT: popl %ebx 113; X32-NEXT: retl 114; 115; X64-LABEL: test2: 116; X64: # %bb.0: # %entry 117; X64-NEXT: pushq %rbx 118; X64-NEXT: incl (%rdi) 119; X64-NEXT: setne %bl 120; X64-NEXT: movl $42, %edi 121; X64-NEXT: callq external 122; X64-NEXT: testb %bl, %bl 123; X64-NEXT: je .LBB1_1 124; X64-NEXT: # %bb.2: # %else 125; X64-NEXT: xorl %eax, %eax 126; X64-NEXT: popq %rbx 127; X64-NEXT: retq 128; X64-NEXT: .LBB1_1: # %then 129; X64-NEXT: movl $64, %eax 130; X64-NEXT: popq %rbx 131; X64-NEXT: retq 132entry: 133 %val = load i32, i32* %ptr 134 %inc = add i32 %val, 1 135 store i32 %inc, i32* %ptr 136 %cmp = icmp eq i32 %inc, 0 137 call void @external(i32 42) 138 br i1 %cmp, label %then, label %else 139 140then: 141 ret i32 64 142 143else: 144 ret i32 0 145} 146 147declare void @external_a() 148declare void @external_b() 149 150; This lowers to a conditional tail call instead of a conditional branch. This 151; is tricky because we can only do this from a leaf function, and so we have to 152; use volatile stores similar to test1 to force the save and restore of 153; a condition without calling another function. We then set up subsequent calls 154; in tail position. 155define void @test_tail_call(i32* %ptr) nounwind optsize { 156; X32-LABEL: test_tail_call: 157; X32: # %bb.0: # %entry 158; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 159; X32-NEXT: incl (%eax) 160; X32-NEXT: setne %al 161; X32-NEXT: incb a 162; X32-NEXT: sete d 163; X32-NEXT: testb %al, %al 164; X32-NEXT: jne external_b # TAILCALL 165; X32-NEXT: # %bb.1: # %then 166; X32-NEXT: jmp external_a # TAILCALL 167; 168; X64-LABEL: test_tail_call: 169; X64: # %bb.0: # %entry 170; X64-NEXT: incl (%rdi) 171; X64-NEXT: setne %al 172; X64-NEXT: incb {{.*}}(%rip) 173; X64-NEXT: sete {{.*}}(%rip) 174; X64-NEXT: testb %al, %al 175; X64-NEXT: jne external_b # TAILCALL 176; X64-NEXT: # %bb.1: # %then 177; X64-NEXT: jmp external_a # TAILCALL 178entry: 179 %val = load i32, i32* %ptr 180 %inc = add i32 %val, 1 181 store i32 %inc, i32* %ptr 182 %cmp = icmp eq i32 %inc, 0 183 %aval = load volatile i8, i8* @a 184 %inc2 = add i8 %aval, 1 185 store volatile i8 %inc2, i8* @a 186 %cmp2 = icmp eq i8 %inc2, 0 187 %conv5 = zext i1 %cmp2 to i8 188 store i8 %conv5, i8* @d 189 br i1 %cmp, label %then, label %else 190 191then: 192 tail call void @external_a() 193 ret void 194 195else: 196 tail call void @external_b() 197 ret void 198} 199 200; Test a function that gets special select lowering into CFG with copied EFLAGS 201; threaded across the CFG. This requires our EFLAGS copy rewriting to handle 202; cross-block rewrites in at least some narrow cases. 203define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) { 204; X32-LABEL: PR37100: 205; X32: # %bb.0: # %bb 206; X32-NEXT: pushl %ebp 207; X32-NEXT: .cfi_def_cfa_offset 8 208; X32-NEXT: pushl %ebx 209; X32-NEXT: .cfi_def_cfa_offset 12 210; X32-NEXT: pushl %edi 211; X32-NEXT: .cfi_def_cfa_offset 16 212; X32-NEXT: pushl %esi 213; X32-NEXT: .cfi_def_cfa_offset 20 214; X32-NEXT: .cfi_offset %esi, -20 215; X32-NEXT: .cfi_offset %edi, -16 216; X32-NEXT: .cfi_offset %ebx, -12 217; X32-NEXT: .cfi_offset %ebp, -8 218; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 219; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 220; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx 221; X32-NEXT: movb {{[0-9]+}}(%esp), %ch 222; X32-NEXT: movb {{[0-9]+}}(%esp), %cl 223; X32-NEXT: jmp .LBB3_1 224; X32-NEXT: .p2align 4, 0x90 225; X32-NEXT: .LBB3_5: # %bb1 226; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 227; X32-NEXT: xorl %eax, %eax 228; X32-NEXT: xorl %edx, %edx 229; X32-NEXT: idivl %ebp 230; X32-NEXT: .LBB3_1: # %bb1 231; X32-NEXT: # =>This Inner Loop Header: Depth=1 232; X32-NEXT: movsbl %cl, %eax 233; X32-NEXT: movl %eax, %edx 234; X32-NEXT: sarl $31, %edx 235; X32-NEXT: cmpl %eax, %esi 236; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 237; X32-NEXT: sbbl %edx, %eax 238; X32-NEXT: setl %al 239; X32-NEXT: setl %dl 240; X32-NEXT: movzbl %dl, %ebp 241; X32-NEXT: negl %ebp 242; X32-NEXT: testb %al, %al 243; X32-NEXT: jne .LBB3_3 244; X32-NEXT: # %bb.2: # %bb1 245; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 246; X32-NEXT: movb %ch, %cl 247; X32-NEXT: .LBB3_3: # %bb1 248; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 249; X32-NEXT: movb %cl, (%ebx) 250; X32-NEXT: movl (%edi), %edx 251; X32-NEXT: testb %al, %al 252; X32-NEXT: jne .LBB3_5 253; X32-NEXT: # %bb.4: # %bb1 254; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 255; X32-NEXT: movl %edx, %ebp 256; X32-NEXT: jmp .LBB3_5 257; 258; X64-LABEL: PR37100: 259; X64: # %bb.0: # %bb 260; X64-NEXT: movq %rdx, %r10 261; X64-NEXT: jmp .LBB3_1 262; X64-NEXT: .p2align 4, 0x90 263; X64-NEXT: .LBB3_5: # %bb1 264; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 265; X64-NEXT: xorl %eax, %eax 266; X64-NEXT: xorl %edx, %edx 267; X64-NEXT: idivl %esi 268; X64-NEXT: .LBB3_1: # %bb1 269; X64-NEXT: # =>This Inner Loop Header: Depth=1 270; X64-NEXT: movsbq %dil, %rax 271; X64-NEXT: xorl %esi, %esi 272; X64-NEXT: cmpq %rax, %r10 273; X64-NEXT: setl %sil 274; X64-NEXT: negl %esi 275; X64-NEXT: cmpq %rax, %r10 276; X64-NEXT: jl .LBB3_3 277; X64-NEXT: # %bb.2: # %bb1 278; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 279; X64-NEXT: movl %ecx, %edi 280; X64-NEXT: .LBB3_3: # %bb1 281; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 282; X64-NEXT: movb %dil, (%r8) 283; X64-NEXT: jl .LBB3_5 284; X64-NEXT: # %bb.4: # %bb1 285; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 286; X64-NEXT: movl (%r9), %esi 287; X64-NEXT: jmp .LBB3_5 288bb: 289 br label %bb1 290 291bb1: 292 %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ] 293 %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ] 294 %tmp3 = icmp sgt i16 %tmp2, 7 295 %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7 296 %tmp5 = sext i8 %tmp to i64 297 %tmp6 = icmp slt i64 %arg3, %tmp5 298 %tmp7 = sext i1 %tmp6 to i32 299 %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4 300 store volatile i8 %tmp8, i8* %ptr1 301 %tmp9 = load volatile i32, i32* %ptr2 302 %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9 303 %tmp11 = srem i32 0, %tmp10 304 %tmp12 = trunc i32 %tmp11 to i16 305 br label %bb1 306} 307 308; Use a particular instruction pattern in order to lower to the post-RA pseudo 309; used to lower SETB into an SBB pattern in order to make sure that kind of 310; usage of a copied EFLAGS continues to work. 311define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) { 312; X32-LABEL: PR37431: 313; X32: # %bb.0: # %entry 314; X32-NEXT: pushl %esi 315; X32-NEXT: .cfi_def_cfa_offset 8 316; X32-NEXT: .cfi_offset %esi, -8 317; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 318; X32-NEXT: movl (%eax), %eax 319; X32-NEXT: movl %eax, %ecx 320; X32-NEXT: sarl $31, %ecx 321; X32-NEXT: cmpl %eax, %eax 322; X32-NEXT: sbbl %ecx, %eax 323; X32-NEXT: setb %al 324; X32-NEXT: sbbb %cl, %cl 325; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 326; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 327; X32-NEXT: movb %cl, (%edx) 328; X32-NEXT: movzbl %al, %eax 329; X32-NEXT: xorl %ecx, %ecx 330; X32-NEXT: subl %eax, %ecx 331; X32-NEXT: xorl %eax, %eax 332; X32-NEXT: xorl %edx, %edx 333; X32-NEXT: idivl %ecx 334; X32-NEXT: movb %dl, (%esi) 335; X32-NEXT: popl %esi 336; X32-NEXT: .cfi_def_cfa_offset 4 337; X32-NEXT: retl 338; 339; X64-LABEL: PR37431: 340; X64: # %bb.0: # %entry 341; X64-NEXT: movq %rdx, %rcx 342; X64-NEXT: movslq (%rdi), %rax 343; X64-NEXT: cmpq %rax, %rax 344; X64-NEXT: sbbb %dl, %dl 345; X64-NEXT: cmpq %rax, %rax 346; X64-NEXT: movb %dl, (%rsi) 347; X64-NEXT: sbbl %esi, %esi 348; X64-NEXT: xorl %eax, %eax 349; X64-NEXT: xorl %edx, %edx 350; X64-NEXT: idivl %esi 351; X64-NEXT: movb %dl, (%rcx) 352; X64-NEXT: retq 353entry: 354 %tmp = load i32, i32* %arg1 355 %tmp1 = sext i32 %tmp to i64 356 %tmp2 = icmp ugt i64 %tmp1, undef 357 %tmp3 = zext i1 %tmp2 to i8 358 %tmp4 = sub i8 0, %tmp3 359 store i8 %tmp4, i8* %arg2 360 %tmp5 = sext i8 %tmp4 to i32 361 %tmp6 = srem i32 0, %tmp5 362 %tmp7 = trunc i32 %tmp6 to i8 363 store i8 %tmp7, i8* %arg3 364 ret void 365} 366