; This tests the optimization where producers and consumers of i1 (bool) ; variables are combined to implicitly use flags instead of explicitly using ; stack or register variables. ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ ; RUN: -allow-externally-defined-symbols | FileCheck %s ; RUN: %if --need=target_ARM32 --command %p2i --filetype=obj \ ; RUN: --target arm32 -i %s --disassemble --args -O2 \ ; RUN: -allow-externally-defined-symbols \ ; RUN: | %if --need=target_ARM32 --command FileCheck %s \ ; RUN: --check-prefix=ARM32 declare void @use_value(i32) ; Basic cmp/branch folding. define internal i32 @fold_cmp_br(i32 %arg1, i32 %arg2) { entry: %cmp1 = icmp slt i32 %arg1, %arg2 br i1 %cmp1, label %branch1, label %branch2 branch1: ret i32 1 branch2: ret i32 2 } ; CHECK-LABEL: fold_cmp_br ; CHECK: cmp ; CHECK: jge ; ARM32-LABEL: fold_cmp_br ; ARM32: cmp r0, r1 ; ARM32: bge ; ARM32: mov r0, #1 ; ARM32: bx lr ; ARM32: mov r0, #2 ; ARM32: bx lr ; Cmp/branch folding with intervening instructions. define internal i32 @fold_cmp_br_intervening_insts(i32 %arg1, i32 %arg2) { entry: %cmp1 = icmp slt i32 %arg1, %arg2 call void @use_value(i32 %arg1) br i1 %cmp1, label %branch1, label %branch2 branch1: ret i32 1 branch2: ret i32 2 } ; CHECK-LABEL: fold_cmp_br_intervening_insts ; CHECK-NOT: cmp ; CHECK: call ; CHECK: cmp ; CHECK: jge ; ARM32-LABEL: fold_cmp_br_intervening_insts ; ARM32: push {{[{].*[}]}} ; ARM32: bl{{.*}}use_value ; ARM32: cmp {{r[0-9]+}}, {{r[0-9]+}} ; ARM32: bge ; ARM32: mov r0, #1 ; ARM32: bx lr ; ARM32: mov r0, #2 ; ARM32: bx lr ; Cmp/branch non-folding because of live-out. define internal i32 @no_fold_cmp_br_liveout(i32 %arg1, i32 %arg2) { entry: %cmp1 = icmp slt i32 %arg1, %arg2 br label %next next: br i1 %cmp1, label %branch1, label %branch2 branch1: ret i32 1 branch2: ret i32 2 } ; CHECK-LABEL: no_fold_cmp_br_liveout ; CHECK: cmp ; CHECK: set ; CHECK: cmp ; CHECK: je ; ARM32-LABEL: no_fold_cmp_br_liveout ; ARM32: cmp ; ARM32: movlt [[REG:r[0-9]+]] ; ARM32: tst [[REG]], #1 ; ARM32: beq ; Cmp/branch non-folding because of extra non-whitelisted uses. define internal i32 @no_fold_cmp_br_non_whitelist(i32 %arg1, i32 %arg2) { entry: %cmp1 = icmp slt i32 %arg1, %arg2 %result = zext i1 %cmp1 to i32 br i1 %cmp1, label %branch1, label %branch2 branch1: ret i32 %result branch2: ret i32 2 } ; CHECK-LABEL: no_fold_cmp_br_non_whitelist ; CHECK: cmp ; CHECK: set ; CHECK: movzx ; CHECK: cmp ; CHECK: je ; ARM32-LABEL: no_fold_cmp_br_non_whitelist ; ARM32: mov [[R:r[0-9]+]], #0 ; ARM32: cmp r0, r1 ; ARM32: movlt [[R]], #1 ; ARM32: tst [[R]], #1 ; ARM32: beq ; ARM32: bx lr ; ARM32: mov r0, #2 ; ARM32: bx lr ; Basic cmp/select folding. define internal i32 @fold_cmp_select(i32 %arg1, i32 %arg2) { entry: %cmp1 = icmp slt i32 %arg1, %arg2 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 ret i32 %result } ; CHECK-LABEL: fold_cmp_select ; CHECK: cmp ; CHECK: cmovl ; ARM32-LABEL: fold_cmp_select ; ARM32: cmp r0, r1 ; ARM32: movlt {{r[0-9]+}}, r0 ; 64-bit cmp/select folding. define internal i64 @fold_cmp_select_64(i64 %arg1, i64 %arg2) { entry: %arg1_trunc = trunc i64 %arg1 to i32 %arg2_trunc = trunc i64 %arg2 to i32 %cmp1 = icmp slt i32 %arg1_trunc, %arg2_trunc %result = select i1 %cmp1, i64 %arg1, i64 %arg2 ret i64 %result } ; CHECK-LABEL: fold_cmp_select_64 ; CHECK: cmp ; CHECK: cmovl ; CHECK: cmovl ; ARM32-LABEL: fold_cmp_select_64 ; ARM32: cmp r0, r2 ; ARM32: movlt [[LOW:r[0-9]+]], r0 ; ARM32: movlt [[HIGH:r[0-9]+]], r1 ; ARM32: mov r0, [[LOW]] ; ARM32: mov r1, [[HIGH]] ; ARM32: bx lr define internal i64 @fold_cmp_select_64_undef(i64 %arg1) { entry: %arg1_trunc = trunc i64 %arg1 to i32 %cmp1 = icmp slt i32 undef, %arg1_trunc %result = select i1 %cmp1, i64 %arg1, i64 undef ret i64 %result } ; CHECK-LABEL: fold_cmp_select_64_undef ; CHECK: cmp ; CHECK: cmovl ; CHECK: cmovl ; ARM32-LABEL: fold_cmp_select_64_undef ; ARM32: mov ; ARM32: rsbs r{{[0-9]+}}, r{{[0-9]+}}, #0 ; ARM32: movlt ; ARM32: movlt ; ARM32: bx lr ; Cmp/select folding with intervening instructions. define internal i32 @fold_cmp_select_intervening_insts(i32 %arg1, i32 %arg2) { entry: %cmp1 = icmp slt i32 %arg1, %arg2 call void @use_value(i32 %arg1) %result = select i1 %cmp1, i32 %arg1, i32 %arg2 ret i32 %result } ; CHECK-LABEL: fold_cmp_select_intervening_insts ; CHECK-NOT: cmp ; CHECK: call ; CHECK: cmp ; CHECK: cmovl ; ARM32-LABEL: fold_cmp_select_intervening_insts ; ARM32: bl{{.*}}use_value ; ARM32: cmp r{{[0-9]+}}, r{{[0-9]+}} ; ARM32: movlt ; ARM32: bx lr ; Cmp/multi-select folding. define internal i32 @fold_cmp_select_multi(i32 %arg1, i32 %arg2) { entry: %cmp1 = icmp slt i32 %arg1, %arg2 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 %c = select i1 %cmp1, i32 123, i32 %arg1 %partial = add i32 %a, %b %result = add i32 %partial, %c ret i32 %result } ; CHECK-LABEL: fold_cmp_select_multi ; CHECK: cmp ; CHECK: cmovl ; CHECK: cmp ; CHECK: cmovl ; CHECK: cmp ; CHECK: cmovge ; CHECK: add ; CHECK: add ; ARM32-LABEL: fold_cmp_select_multi ; ARM32: mov ; ARM32: cmp ; ARM32: movlt {{.*}}, #1 ; ARM32: mov ; ARM32: tst {{.*}}, #1 ; ARM32: movne ; ARM32: mov ; ARM32: tst {{.*}}, #1 ; ARM32: movne ; ARM32: tst {{.*}}, #1 ; ARM32: movne {{.*}}, #123 ; ARM32: bx lr ; Cmp/multi-select non-folding because of live-out. define internal i32 @no_fold_cmp_select_multi_liveout(i32 %arg1, i32 %arg2) { entry: %cmp1 = icmp slt i32 %arg1, %arg2 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 br label %next next: %c = select i1 %cmp1, i32 123, i32 %arg1 %partial = add i32 %a, %b %result = add i32 %partial, %c ret i32 %result } ; CHECK-LABEL: no_fold_cmp_select_multi_liveout ; CHECK: set ; CHECK: cmp ; CHECK: cmovne ; CHECK: cmp ; CHECK: cmovne ; CHECK: cmp ; CHECK: cmove ; CHECK: add ; CHECK: add ; ARM32-LABEL: no_fold_cmp_select_multi_liveout ; ARM32: mov ; ARM32: cmp r0, r1 ; ARM32: movlt ; ARM32: mov ; ARM32: tst ; ARM32: movne ; ARM32: mov ; ARM32: tst ; ARM32: movne ; ARM32: tst ; ARM32: movne ; ARM32: bx lr ; Cmp/branch non-folding due to load folding and intervening store. define internal i32 @no_fold_cmp_br_store(i32 %arg2, i32 %argaddr) { entry: %addr = inttoptr i32 %argaddr to i32* %arg1 = load i32, i32* %addr, align 1 %cmp1 = icmp slt i32 %arg1, %arg2 store i32 1, i32* %addr, align 1 br i1 %cmp1, label %branch1, label %branch2 branch1: ret i32 1 branch2: ret i32 2 } ; CHECK-LABEL: no_fold_cmp_br_store ; CHECK: cmp ; CHECK: set ; CHECK: cmp ; Cmp/select non-folding due to load folding and intervening store. define internal i32 @no_fold_cmp_select_store(i32 %arg1, i32 %argaddr) { entry: %addr = inttoptr i32 %argaddr to i32* %arg2 = load i32, i32* %addr, align 1 %cmp1 = icmp slt i32 %arg1, %arg2 store i32 1, i32* %addr, align 1 %result = select i1 %cmp1, i32 %arg1, i32 %argaddr ret i32 %result } ; CHECK-LABEL: no_fold_cmp_select_store ; CHECK: cmp ; CHECK: setl ; CHECK: mov DWORD PTR ; CHECK: cmp ; CHECK: cmovne ; Cmp/select folding due to load folding and non-intervening store. define internal i32 @fold_cmp_select_store(i32 %arg1, i32 %argaddr) { entry: %addr = inttoptr i32 %argaddr to i32* %arg2 = load i32, i32* %addr, align 1 %cmp1 = icmp slt i32 %arg1, %arg2 %result = select i1 %cmp1, i32 %arg1, i32 %argaddr store i32 1, i32* %addr, align 1 ret i32 %result } ; CHECK-LABEL: fold_cmp_select_store ; CHECK: cmp {{.*}},DWORD PTR ; CHECK: cmovl ; Cmp/multi-select non-folding because of extra non-whitelisted uses. define internal i32 @no_fold_cmp_select_multi_non_whitelist(i32 %arg1, i32 %arg2) { entry: %cmp1 = icmp slt i32 %arg1, %arg2 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 %c = select i1 %cmp1, i32 123, i32 %arg1 %ext = zext i1 %cmp1 to i32 %partial1 = add i32 %a, %b %partial2 = add i32 %partial1, %c %result = add i32 %partial2, %ext ret i32 %result } ; CHECK-LABEL: no_fold_cmp_select_multi_non_whitelist ; CHECK: set ; CHECK: cmp ; CHECK: cmovne ; CHECK: cmp ; CHECK: cmovne ; CHECK: cmp ; CHECK: cmove ; CHECK: movzx ; CHECK: add ; CHECK: add ; CHECK: add ; ARM32-LABEL: no_fold_cmp_select_multi_non_whitelist ; ARM32: mov ; ARM32: cmp r0, r1 ; ARM32: movlt ; ARM32: mov ; ARM32: tst ; ARM32: movne ; ARM32: mov ; ARM32: tst ; ARM32: movne ; ARM32: tst ; ARM32: movne ; ARM32: bx lr define internal i32 @br_i1_folding2_and(i32 %arg1, i32 %arg2) { %t0 = trunc i32 %arg1 to i1 %t1 = trunc i32 %arg2 to i1 %t2 = and i1 %t0, %t1 br i1 %t2, label %target_true, label %target_false target_true: ret i32 1 target_false: ret i32 0 } ; ARM32-LABEL: br_i1_folding2_and ; ARM32: tst r0, #1 ; ARM32: beq ; ARM32: tst r1, #1 ; ARM32: beq define internal i32 @br_i1_folding2_or(i32 %arg1, i32 %arg2) { %t0 = trunc i32 %arg1 to i1 %t1 = trunc i32 %arg2 to i1 %t2 = or i1 %t0, %t1 br i1 %t2, label %target_true, label %target_false target_true: ret i32 1 target_false: ret i32 0 } ; ARM32-LABEL: br_i1_folding2_or ; ARM32: tst r0, #1 ; ARM32: bne ; ARM32: tst r1, #1 ; ARM32: beq define internal i32 @br_i1_folding3_and_or(i32 %arg1, i32 %arg2, i32 %arg3) { %t0 = trunc i32 %arg1 to i1 %t1 = trunc i32 %arg2 to i1 %t2 = trunc i32 %arg3 to i1 %t3 = and i1 %t0, %t1 %t4 = or i1 %t3, %t2 br i1 %t4, label %target_true, label %target_false target_true: ret i32 1 target_false: ret i32 0 } ; ARM32-LABEL: br_i1_folding3_and_or ; ARM32: tst r0, #1 ; ARM32: beq ; ARM32: tst r1, #1 ; ARM32: bne ; ARM32: tst r2, #1 ; ARM32: beq define internal i32 @br_i1_folding3_or_and(i32 %arg1, i32 %arg2, i32 %arg3) { %t0 = trunc i32 %arg1 to i1 %t1 = trunc i32 %arg2 to i1 %t2 = trunc i32 %arg3 to i1 %t3 = or i1 %t0, %t1 %t4 = and i1 %t3, %t2 br i1 %t4, label %target_true, label %target_false target_true: ret i32 1 target_false: ret i32 0 } ; ARM32-LABEL: br_i1_folding3_or_and ; ARM32: tst r0, #1 ; ARM32: bne ; ARM32: tst r1, #1 ; ARM32: beq ; ARM32: tst r2, #1 ; ARM32: beq define internal i32 @br_i1_folding4(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5) { %t0 = trunc i32 %arg1 to i1 %t1 = trunc i32 %arg2 to i1 %t2 = trunc i32 %arg3 to i1 %t3 = trunc i32 %arg4 to i1 %t4 = trunc i32 %arg5 to i1 %t5 = or i1 %t0, %t1 %t6 = and i1 %t5, %t2 %t7 = and i1 %t3, %t4 %t8 = or i1 %t6, %t7 br i1 %t8, label %target_true, label %target_false target_true: ret i32 1 target_false: ret i32 0 } ; ARM32-LABEL: br_i1_folding4 ; ARM32: tst r0, #1 ; ARM32: bne ; ARM32: tst r1, #1 ; ARM32: beq ; ARM32: tst r2, #1 ; ARM32: bne ; ARM32: tst r3, #1 ; ARM32: beq [[TARGET:.*]] ; ARM32: tst r4, #1 ; ARM32: beq [[TARGET]]