# RUN: llc -mtriple=thumbv8a-unknown-linux-gnueabi %s -o - -run-pass=if-converter -debug-only=if-converter | FileCheck %s # RUN: llc -mtriple=thumbv7-unknown-linux-gnueabi %s -o - -run-pass=if-converter -debug-only=if-converter 2>%t| FileCheck %s # RUN: FileCheck %s < %t --check-prefix=DEBUG # REQUIRES: asserts # When optimising for size, we use a different set of heuristics for # if-conversion, which take into account the size of the instructions, not the # time taken to execute them. This is more complicated for Thumb, where it if # also affected by selection of narrow branch instructions, insertion if IT # instructions, and selection of the CB(N)Z instructions. --- | define void @fn1() minsize { entry: unreachable if.then: unreachable if.else: unreachable if.end: unreachable } define void @fn2() minsize { entry: unreachable if.then: unreachable if.else: unreachable if.end: unreachable } define void @fn3() minsize { entry: unreachable if.then: unreachable if.else: unreachable if.end: unreachable } define void @fn4() minsize "target-features"="-thumb-mode" { entry: unreachable if.then: unreachable if.else: unreachable if.end: unreachable } define void @fn5() minsize { entry: unreachable if.then: unreachable if.else: unreachable if.end: unreachable } define void @fn6() minsize { entry: unreachable if.then: unreachable if.else: unreachable if2.then: unreachable if2.else: unreachable } define void @fn7() minsize "target-features"="-thumb-mode" { entry: unreachable if.then: unreachable if.else: unreachable if.end: unreachable } define void @fn8() minsize { entry: unreachable if.then: unreachable if.else: unreachable if.end: unreachable } define void @fn9() minsize { entry: unreachable if.then: unreachable if.else: unreachable lab1: unreachable } ... --- name: fn1 alignment: 1 tracksRegLiveness: true # If-conversion is profitable here because it will remove two branches of 2 # bytes each (assuming they can become narrow branches later), and will only # add 2 bytes with the IT instruction. # CHECK-LABEL: name: fn1 # CHECK: t2CMPri # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRSHi12 # CHECK-NEXT: t2MOVi # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn1' # DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=4, ExtraPredicateBytes=2) body: | bb.0.entry: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $r0, $r1, $r2, $r3 t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 11, killed $cpsr bb.1.if.then: successors: %bb.3(0x80000000) liveins: $r0, $r3 renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg t2B %bb.3, 14, $noreg bb.2.if.else: successors: %bb.3(0x80000000) liveins: $r1, $r3 renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg bb.3.if.end: liveins: $r0, $r3 renamable $r1 = t2MOVi 0, 14, $noreg, $noreg t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg tBX_RET 14, $noreg, implicit $r0 --- name: fn2 alignment: 1 tracksRegLiveness: true # If-conversion is not profitable here, because the 5 conditional instructions # would require 2 IT instructions. # CHECK-LABEL: name: fn2 # CHECK: t2CMPri # CHECK-NEXT: t2Bcc # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn2' # DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=4) body: | bb.0.entry: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $r0, $r1, $r2, $r3 t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 11, killed $cpsr bb.1.if.then: successors: %bb.3(0x80000000) liveins: $r0, $r3 renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg t2B %bb.3, 14, $noreg bb.2.if.else: successors: %bb.3(0x80000000) liveins: $r1, $r3 renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg bb.3.if.end: liveins: $r0, $r3 renamable $r1 = t2MOVi 0, 14, $noreg, $noreg t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg tBX_RET 14, $noreg, implicit $r0 --- name: fn3 alignment: 1 tracksRegLiveness: true # Here, the true and false blocks both end in a tBX_RET instruction. One of # these will be removed, saving 2 bytes, and the remaining one isn't # conditional, so doesn't push us over the limit of 4 instructions in an IT # block. # CHECK-LABEL: name: fn3 # CHECK: t2CMPri # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRSHi12 # CHECK-NEXT: tBX_RET # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn3' # DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2) body: | bb.0.entry: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $r0, $r1, $r2, $r3 t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 11, killed $cpsr bb.1.if.then: liveins: $r0, $r3 renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg tBX_RET 14, $noreg, implicit $r0 bb.2.if.else: liveins: $r1, $r3 renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg tBX_RET 14, $noreg, implicit $r0 --- name: fn4 alignment: 1 tracksRegLiveness: true # This is the same as fn2, but compiled for ARM, which doesn't need IT # instructions, so if-conversion is profitable. # CHECK-LABEL: name: fn4 # CHECK: CMPri # CHECK-NEXT: LDRi12 # CHECK-NEXT: LDRi12 # CHECK-NEXT: LDRSH # CHECK-NEXT: LDRi12 # CHECK-NEXT: LDRi12 # CHECK-NEXT: MOVi # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn4' # DEBUG: MeetIfcvtSizeLimit(BranchBytes=8, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=0) body: | bb.0.entry: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $r0, $r1, $r2, $r3 CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr Bcc %bb.2, 11, killed $cpsr bb.1.if.then: successors: %bb.3(0x80000000) liveins: $r0, $r3 renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg B %bb.3 bb.2.if.else: successors: %bb.3(0x80000000) liveins: $r1, $r3 renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg bb.3.if.end: liveins: $r0, $r3 renamable $r1 = MOVi 0, 14, $noreg, $noreg STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg BX_RET 14, $noreg, implicit $r0 --- name: fn5 alignment: 1 tracksRegLiveness: true # Here, the compare and conditional branch can be turned into a CBZ, so we # don't want to if-convert. # CHECK-LABEL: name: fn5 # CHECK: t2CMPri # CHECK: t2Bcc # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn5' # DEBUG: MeetIfcvtSizeLimit(BranchBytes=0, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2) body: | bb.0.entry: successors: %bb.1(0x30000000), %bb.2(0x50000000) liveins: $r0, $r1, $r2 t2CMPri killed renamable $r2, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 1, killed $cpsr bb.1.if.then: liveins: $r0 renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg tBX_RET 14, $noreg, implicit $r0 bb.2.if.else: liveins: $r1 renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg tBX_RET 14, $noreg, implicit $r0 --- name: fn6 alignment: 1 tracksRegLiveness: true # This is a forked-diamond pattern, we recognise that the conditional branches # at the ends of the true and false blocks are the same, and can be shared. # CHECK-LABEL: name: fn6 # CHECK: t2CMPri # CHECK-NEXT: t2LDRSHi12 # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2CMPri # CHECK-NEXT: t2Bcc # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn6' # DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=12, NumPredicatedInstructions=4, ExtraPredicateBytes=2) body: | bb.0.entry: successors: %bb.1(0x30000000), %bb.2(0x50000000) liveins: $r0, $r1, $r2, $r3 t2CMPri killed renamable $r2, 4, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 1, killed $cpsr bb.1.if.then: successors: %bb.3(0x30000000), %bb.4(0x50000000) liveins: $r0, $r1, $r3 renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.3.if2.then, 1, killed $cpsr t2B %bb.4.if2.else, 14, $noreg bb.2.if.else: successors: %bb.3(0x30000000), %bb.4(0x50000000) liveins: $r0, $r1, $r3 renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.3.if2.then, 1, killed $cpsr t2B %bb.4.if2.else, 14, $noreg bb.3.if2.then: liveins: $r0, $r1, $r3 t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg tBX_RET 14, $noreg, implicit $r0 bb.4.if2.else: liveins: $r0 tBX_RET 14, $noreg, implicit $r0 --- name: fn7 alignment: 1 tracksRegLiveness: true # When compiling for ARM, it would be good for code size to generate very long # runs of conditional instructions, but we put an (arbitrary) limit on this to # avoid generating code which is very bad for performance, and only saves a few # bytes of code size. # CHECK-LABEL: name: fn7 # CHECK: CMPri # CHECK-NEXT: Bcc body: | bb.0.entry: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $r0, $r1, $r2, $r3 CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr Bcc %bb.2, 11, killed $cpsr bb.1.if.then: successors: %bb.3(0x80000000) liveins: $r0, $r3 renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg B %bb.3 bb.2.if.else: successors: %bb.3(0x80000000) liveins: $r1, $r3 renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg bb.3.if.end: liveins: $r0, $r3 renamable $r1 = MOVi 0, 14, $noreg, $noreg STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg BX_RET 14, $noreg, implicit $r0 --- name: fn8 alignment: 1 tracksRegLiveness: true # The first t2LDRi12 instruction in each branch is the same, so one copy of it # will be removed, and it doesn't need to be predicated, keeping us under the 4 # instruction IT block limit. # CHECK-LABEL: name: fn8 # CHECK: t2CMPri # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRi12 # CHECK-NEXT: t2LDRSHi12 # CHECK-NEXT: t2MOVi # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn8' # DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=4, NumPredicatedInstructions=4, ExtraPredicateBytes=2) body: | bb.0.entry: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $r0, $r1, $r2, $r3 t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 11, killed $cpsr bb.1.if.then: successors: %bb.3(0x80000000) liveins: $r0, $r3 renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRi12 killed renamable $r0, 4, 14, $noreg t2B %bb.3, 14, $noreg bb.2.if.else: successors: %bb.3(0x80000000) liveins: $r0, $r3 renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg bb.3.if.end: liveins: $r0, $r3 renamable $r1 = t2MOVi 0, 14, $noreg, $noreg t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg tBX_RET 14, $noreg, implicit $r0 --- name: fn9 alignment: 2 tracksRegLiveness: true # The INLINEASM_BR instructions aren't analyzable, but they are identical so we # can still do diamond if-conversion. From a code-size POV, they are common # instructions, so one will be removed, and they don't need an IT block slot. # CHECK-LABEL: name: fn9 # CHECK: tCMPi8 # CHECK-NEXT: tLDRi # CHECK-NEXT: tLDRi # CHECK-NEXT: tLDRi # CHECK-NEXT: t2LDRSHi12 # CHECK-NEXT: INLINEASM_BR # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn9' # DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=8, NumPredicatedInstructions=4, ExtraPredicateBytes=2) body: | bb.0.entry: successors: %bb.1(0x30000000), %bb.3(0x50000000) liveins: $r0, $r1, $r2 tCMPi8 renamable $r2, 42, 14, $noreg, implicit-def $cpsr t2Bcc %bb.3, 1, killed $cpsr bb.1.if.then: successors: %bb.5(0x7fffffff) liveins: $r0, $r2 renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1) tBX_RET 14, $noreg, implicit $r2 bb.3.if.else: successors: %bb.5(0x7fffffff) liveins: $r1, $r2 renamable $r0 = tLDRi killed renamable $r1, 0, 14, $noreg renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1) tBX_RET 14, $noreg, implicit $r2 bb.5.lab1 (address-taken): liveins: $r0 renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 5, 14, $noreg tBX_RET 14, $noreg, implicit $r0 ...