1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32 3; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64 4 5; The peephole optimizer can elide some physical register copies such as 6; EFLAGS. Make sure the flags are used directly, instead of needlessly using 7; saving and restoring specific conditions. 8 9@L = external global i32 10@M = external global i8 11 12declare i32 @bar(i64) 13 14define i1 @plus_one() nounwind { 15; CHECK32-LABEL: plus_one: 16; CHECK32: # %bb.0: # %entry 17; CHECK32-NEXT: movb M, %al 18; CHECK32-NEXT: incl L 19; CHECK32-NEXT: jne .LBB0_2 20; CHECK32-NEXT: # %bb.1: # %entry 21; CHECK32-NEXT: andb $8, %al 22; CHECK32-NEXT: je .LBB0_2 23; CHECK32-NEXT: # %bb.3: # %exit2 24; CHECK32-NEXT: xorl %eax, %eax 25; CHECK32-NEXT: retl 26; CHECK32-NEXT: .LBB0_2: # %exit 27; CHECK32-NEXT: movb $1, %al 28; CHECK32-NEXT: retl 29; 30; CHECK64-LABEL: plus_one: 31; CHECK64: # %bb.0: # %entry 32; CHECK64-NEXT: movb {{.*}}(%rip), %al 33; CHECK64-NEXT: incl {{.*}}(%rip) 34; CHECK64-NEXT: jne .LBB0_2 35; CHECK64-NEXT: # %bb.1: # %entry 36; CHECK64-NEXT: andb $8, %al 37; CHECK64-NEXT: je .LBB0_2 38; CHECK64-NEXT: # %bb.3: # %exit2 39; CHECK64-NEXT: xorl %eax, %eax 40; CHECK64-NEXT: retq 41; CHECK64-NEXT: .LBB0_2: # %exit 42; CHECK64-NEXT: movb $1, %al 43; CHECK64-NEXT: retq 44entry: 45 %loaded_L = load i32, i32* @L 46 %val = add nsw i32 %loaded_L, 1 ; N.B. will emit inc. 47 store i32 %val, i32* @L 48 %loaded_M = load i8, i8* @M 49 %masked = and i8 %loaded_M, 8 50 %M_is_true = icmp ne i8 %masked, 0 51 %L_is_false = icmp eq i32 %val, 0 52 %cond = and i1 %L_is_false, %M_is_true 53 br i1 %cond, label %exit2, label %exit 54 55exit: 56 ret i1 true 57 58exit2: 59 ret i1 false 60} 61 62define i1 @plus_forty_two() nounwind { 63; CHECK32-LABEL: plus_forty_two: 64; CHECK32: # %bb.0: # %entry 65; CHECK32-NEXT: movb M, %al 66; CHECK32-NEXT: addl $42, L 67; CHECK32-NEXT: jne .LBB1_2 68; CHECK32-NEXT: # %bb.1: # %entry 69; CHECK32-NEXT: andb $8, %al 70; CHECK32-NEXT: je .LBB1_2 71; CHECK32-NEXT: # %bb.3: # %exit2 72; CHECK32-NEXT: xorl %eax, %eax 73; CHECK32-NEXT: retl 74; CHECK32-NEXT: .LBB1_2: # %exit 75; CHECK32-NEXT: movb $1, %al 76; CHECK32-NEXT: retl 77; 78; CHECK64-LABEL: plus_forty_two: 79; CHECK64: # %bb.0: # %entry 80; CHECK64-NEXT: movb {{.*}}(%rip), %al 81; CHECK64-NEXT: addl $42, {{.*}}(%rip) 82; CHECK64-NEXT: jne .LBB1_2 83; CHECK64-NEXT: # %bb.1: # %entry 84; CHECK64-NEXT: andb $8, %al 85; CHECK64-NEXT: je .LBB1_2 86; CHECK64-NEXT: # %bb.3: # %exit2 87; CHECK64-NEXT: xorl %eax, %eax 88; CHECK64-NEXT: retq 89; CHECK64-NEXT: .LBB1_2: # %exit 90; CHECK64-NEXT: movb $1, %al 91; CHECK64-NEXT: retq 92entry: 93 %loaded_L = load i32, i32* @L 94 %val = add nsw i32 %loaded_L, 42 ; N.B. won't emit inc. 95 store i32 %val, i32* @L 96 %loaded_M = load i8, i8* @M 97 %masked = and i8 %loaded_M, 8 98 %M_is_true = icmp ne i8 %masked, 0 99 %L_is_false = icmp eq i32 %val, 0 100 %cond = and i1 %L_is_false, %M_is_true 101 br i1 %cond, label %exit2, label %exit 102 103exit: 104 ret i1 true 105 106exit2: 107 ret i1 false 108} 109 110define i1 @minus_one() nounwind { 111; CHECK32-LABEL: minus_one: 112; CHECK32: # %bb.0: # %entry 113; CHECK32-NEXT: movb M, %al 114; CHECK32-NEXT: decl L 115; CHECK32-NEXT: jne .LBB2_2 116; CHECK32-NEXT: # %bb.1: # %entry 117; CHECK32-NEXT: andb $8, %al 118; CHECK32-NEXT: je .LBB2_2 119; CHECK32-NEXT: # %bb.3: # %exit2 120; CHECK32-NEXT: xorl %eax, %eax 121; CHECK32-NEXT: retl 122; CHECK32-NEXT: .LBB2_2: # %exit 123; CHECK32-NEXT: movb $1, %al 124; CHECK32-NEXT: retl 125; 126; CHECK64-LABEL: minus_one: 127; CHECK64: # %bb.0: # %entry 128; CHECK64-NEXT: movb {{.*}}(%rip), %al 129; CHECK64-NEXT: decl {{.*}}(%rip) 130; CHECK64-NEXT: jne .LBB2_2 131; CHECK64-NEXT: # %bb.1: # %entry 132; CHECK64-NEXT: andb $8, %al 133; CHECK64-NEXT: je .LBB2_2 134; CHECK64-NEXT: # %bb.3: # %exit2 135; CHECK64-NEXT: xorl %eax, %eax 136; CHECK64-NEXT: retq 137; CHECK64-NEXT: .LBB2_2: # %exit 138; CHECK64-NEXT: movb $1, %al 139; CHECK64-NEXT: retq 140entry: 141 %loaded_L = load i32, i32* @L 142 %val = add nsw i32 %loaded_L, -1 ; N.B. will emit dec. 143 store i32 %val, i32* @L 144 %loaded_M = load i8, i8* @M 145 %masked = and i8 %loaded_M, 8 146 %M_is_true = icmp ne i8 %masked, 0 147 %L_is_false = icmp eq i32 %val, 0 148 %cond = and i1 %L_is_false, %M_is_true 149 br i1 %cond, label %exit2, label %exit 150 151exit: 152 ret i1 true 153 154exit2: 155 ret i1 false 156} 157 158define i1 @minus_forty_two() nounwind { 159; CHECK32-LABEL: minus_forty_two: 160; CHECK32: # %bb.0: # %entry 161; CHECK32-NEXT: movb M, %al 162; CHECK32-NEXT: addl $-42, L 163; CHECK32-NEXT: jne .LBB3_2 164; CHECK32-NEXT: # %bb.1: # %entry 165; CHECK32-NEXT: andb $8, %al 166; CHECK32-NEXT: je .LBB3_2 167; CHECK32-NEXT: # %bb.3: # %exit2 168; CHECK32-NEXT: xorl %eax, %eax 169; CHECK32-NEXT: retl 170; CHECK32-NEXT: .LBB3_2: # %exit 171; CHECK32-NEXT: movb $1, %al 172; CHECK32-NEXT: retl 173; 174; CHECK64-LABEL: minus_forty_two: 175; CHECK64: # %bb.0: # %entry 176; CHECK64-NEXT: movb {{.*}}(%rip), %al 177; CHECK64-NEXT: addl $-42, {{.*}}(%rip) 178; CHECK64-NEXT: jne .LBB3_2 179; CHECK64-NEXT: # %bb.1: # %entry 180; CHECK64-NEXT: andb $8, %al 181; CHECK64-NEXT: je .LBB3_2 182; CHECK64-NEXT: # %bb.3: # %exit2 183; CHECK64-NEXT: xorl %eax, %eax 184; CHECK64-NEXT: retq 185; CHECK64-NEXT: .LBB3_2: # %exit 186; CHECK64-NEXT: movb $1, %al 187; CHECK64-NEXT: retq 188entry: 189 %loaded_L = load i32, i32* @L 190 %val = add nsw i32 %loaded_L, -42 ; N.B. won't emit dec. 191 store i32 %val, i32* @L 192 %loaded_M = load i8, i8* @M 193 %masked = and i8 %loaded_M, 8 194 %M_is_true = icmp ne i8 %masked, 0 195 %L_is_false = icmp eq i32 %val, 0 196 %cond = and i1 %L_is_false, %M_is_true 197 br i1 %cond, label %exit2, label %exit 198 199exit: 200 ret i1 true 201 202exit2: 203 ret i1 false 204} 205 206define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind { 207; CHECK32-LABEL: test_intervening_call: 208; CHECK32: # %bb.0: # %entry 209; CHECK32-NEXT: pushl %ebx 210; CHECK32-NEXT: pushl %esi 211; CHECK32-NEXT: pushl %eax 212; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 213; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 214; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx 215; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 216; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 217; CHECK32-NEXT: lock cmpxchg8b (%esi) 218; CHECK32-NEXT: setne %bl 219; CHECK32-NEXT: subl $8, %esp 220; CHECK32-NEXT: pushl %edx 221; CHECK32-NEXT: pushl %eax 222; CHECK32-NEXT: calll bar 223; CHECK32-NEXT: addl $16, %esp 224; CHECK32-NEXT: testb %bl, %bl 225; CHECK32-NEXT: jne .LBB4_3 226; CHECK32-NEXT: # %bb.1: # %t 227; CHECK32-NEXT: movl $42, %eax 228; CHECK32-NEXT: jmp .LBB4_2 229; CHECK32-NEXT: .LBB4_3: # %f 230; CHECK32-NEXT: xorl %eax, %eax 231; CHECK32-NEXT: .LBB4_2: # %t 232; CHECK32-NEXT: xorl %edx, %edx 233; CHECK32-NEXT: addl $4, %esp 234; CHECK32-NEXT: popl %esi 235; CHECK32-NEXT: popl %ebx 236; CHECK32-NEXT: retl 237; 238; CHECK64-LABEL: test_intervening_call: 239; CHECK64: # %bb.0: # %entry 240; CHECK64-NEXT: pushq %rbx 241; CHECK64-NEXT: movq %rsi, %rax 242; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi) 243; CHECK64-NEXT: setne %bl 244; CHECK64-NEXT: movq %rax, %rdi 245; CHECK64-NEXT: callq bar 246; CHECK64-NEXT: testb %bl, %bl 247; CHECK64-NEXT: jne .LBB4_2 248; CHECK64-NEXT: # %bb.1: # %t 249; CHECK64-NEXT: movl $42, %eax 250; CHECK64-NEXT: popq %rbx 251; CHECK64-NEXT: retq 252; CHECK64-NEXT: .LBB4_2: # %f 253; CHECK64-NEXT: xorl %eax, %eax 254; CHECK64-NEXT: popq %rbx 255; CHECK64-NEXT: retq 256entry: 257 ; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS. 258 %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst 259 %v = extractvalue { i64, i1 } %cx, 0 260 %p = extractvalue { i64, i1 } %cx, 1 261 call i32 @bar(i64 %v) 262 br i1 %p, label %t, label %f 263 264t: 265 ret i64 42 266 267f: 268 ret i64 0 269} 270 271define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i64 %bar1, i64 %baz1) nounwind { 272; CHECK32-LABEL: test_two_live_flags: 273; CHECK32: # %bb.0: # %entry 274; CHECK32-NEXT: pushl %ebp 275; CHECK32-NEXT: pushl %ebx 276; CHECK32-NEXT: pushl %edi 277; CHECK32-NEXT: pushl %esi 278; CHECK32-NEXT: pushl %eax 279; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi 280; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebp 281; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 282; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx 283; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx 284; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 285; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 286; CHECK32-NEXT: lock cmpxchg8b (%esi) 287; CHECK32-NEXT: setne {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill 288; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 289; CHECK32-NEXT: movl %edi, %edx 290; CHECK32-NEXT: movl %ebp, %ecx 291; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx 292; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi 293; CHECK32-NEXT: lock cmpxchg8b (%esi) 294; CHECK32-NEXT: sete %al 295; CHECK32-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload 296; CHECK32-NEXT: jne .LBB5_4 297; CHECK32-NEXT: # %bb.1: # %entry 298; CHECK32-NEXT: testb %al, %al 299; CHECK32-NEXT: je .LBB5_4 300; CHECK32-NEXT: # %bb.2: # %t 301; CHECK32-NEXT: movl $42, %eax 302; CHECK32-NEXT: jmp .LBB5_3 303; CHECK32-NEXT: .LBB5_4: # %f 304; CHECK32-NEXT: xorl %eax, %eax 305; CHECK32-NEXT: .LBB5_3: # %t 306; CHECK32-NEXT: xorl %edx, %edx 307; CHECK32-NEXT: addl $4, %esp 308; CHECK32-NEXT: popl %esi 309; CHECK32-NEXT: popl %edi 310; CHECK32-NEXT: popl %ebx 311; CHECK32-NEXT: popl %ebp 312; CHECK32-NEXT: retl 313; 314; CHECK64-LABEL: test_two_live_flags: 315; CHECK64: # %bb.0: # %entry 316; CHECK64-NEXT: movq %rsi, %rax 317; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi) 318; CHECK64-NEXT: setne %dl 319; CHECK64-NEXT: movq %r8, %rax 320; CHECK64-NEXT: lock cmpxchgq %r9, (%rcx) 321; CHECK64-NEXT: sete %al 322; CHECK64-NEXT: testb %dl, %dl 323; CHECK64-NEXT: jne .LBB5_3 324; CHECK64-NEXT: # %bb.1: # %entry 325; CHECK64-NEXT: testb %al, %al 326; CHECK64-NEXT: je .LBB5_3 327; CHECK64-NEXT: # %bb.2: # %t 328; CHECK64-NEXT: movl $42, %eax 329; CHECK64-NEXT: retq 330; CHECK64-NEXT: .LBB5_3: # %f 331; CHECK64-NEXT: xorl %eax, %eax 332; CHECK64-NEXT: retq 333entry: 334 %cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst 335 %p0 = extractvalue { i64, i1 } %cx0, 1 336 %cx1 = cmpxchg i64* %foo1, i64 %bar1, i64 %baz1 seq_cst seq_cst 337 %p1 = extractvalue { i64, i1 } %cx1, 1 338 %flag = and i1 %p0, %p1 339 br i1 %flag, label %t, label %f 340 341t: 342 ret i64 42 343 344f: 345 ret i64 0 346} 347 348define i1 @asm_clobbering_flags(i32* %mem) nounwind { 349; CHECK32-LABEL: asm_clobbering_flags: 350; CHECK32: # %bb.0: # %entry 351; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx 352; CHECK32-NEXT: movl (%ecx), %edx 353; CHECK32-NEXT: testl %edx, %edx 354; CHECK32-NEXT: setg %al 355; CHECK32-NEXT: #APP 356; CHECK32-NEXT: bsfl %edx, %edx 357; CHECK32-NEXT: #NO_APP 358; CHECK32-NEXT: movl %edx, (%ecx) 359; CHECK32-NEXT: retl 360; 361; CHECK64-LABEL: asm_clobbering_flags: 362; CHECK64: # %bb.0: # %entry 363; CHECK64-NEXT: movl (%rdi), %ecx 364; CHECK64-NEXT: testl %ecx, %ecx 365; CHECK64-NEXT: setg %al 366; CHECK64-NEXT: #APP 367; CHECK64-NEXT: bsfl %ecx, %ecx 368; CHECK64-NEXT: #NO_APP 369; CHECK64-NEXT: movl %ecx, (%rdi) 370; CHECK64-NEXT: retq 371entry: 372 %val = load i32, i32* %mem, align 4 373 %cmp = icmp sgt i32 %val, 0 374 %res = tail call i32 asm "bsfl $1,$0", "=r,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %val) 375 store i32 %res, i32* %mem, align 4 376 ret i1 %cmp 377} 378