1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s 3 4; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X)) 5 6; Canolicalize the sequence shl/zext/lshr performing the zeroextend 7; as the last instruction of the sequence. 8; This will help DAGCombiner to identify and then fold the sequence 9; of shifts into a single AND. 10; This transformation is profitable if the shift amounts are the same 11; and if there is only one use of the zext. 12 13define i16 @fun1(i8 zeroext %v) { 14; CHECK-LABEL: fun1: 15; CHECK: # %bb.0: # %entry 16; CHECK-NEXT: movl %edi, %eax 17; CHECK-NEXT: andl $-16, %eax 18; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 19; CHECK-NEXT: retq 20entry: 21 %shr = lshr i8 %v, 4 22 %ext = zext i8 %shr to i16 23 %shl = shl i16 %ext, 4 24 ret i16 %shl 25} 26 27define i32 @fun2(i8 zeroext %v) { 28; CHECK-LABEL: fun2: 29; CHECK: # %bb.0: # %entry 30; CHECK-NEXT: movl %edi, %eax 31; CHECK-NEXT: andl $-16, %eax 32; CHECK-NEXT: retq 33entry: 34 %shr = lshr i8 %v, 4 35 %ext = zext i8 %shr to i32 36 %shl = shl i32 %ext, 4 37 ret i32 %shl 38} 39 40define i32 @fun3(i16 zeroext %v) { 41; CHECK-LABEL: fun3: 42; CHECK: # %bb.0: # %entry 43; CHECK-NEXT: movl %edi, %eax 44; CHECK-NEXT: andl $-16, %eax 45; CHECK-NEXT: retq 46entry: 47 %shr = lshr i16 %v, 4 48 %ext = zext i16 %shr to i32 49 %shl = shl i32 %ext, 4 50 ret i32 %shl 51} 52 53define i64 @fun4(i8 zeroext %v) { 54; CHECK-LABEL: fun4: 55; CHECK: # %bb.0: # %entry 56; CHECK-NEXT: movl %edi, %eax 57; CHECK-NEXT: andl $-16, %eax 58; CHECK-NEXT: retq 59entry: 60 %shr = lshr i8 %v, 4 61 %ext = zext i8 %shr to i64 62 %shl = shl i64 %ext, 4 63 ret i64 %shl 64} 65 66define i64 @fun5(i16 zeroext %v) { 67; CHECK-LABEL: fun5: 68; CHECK: # %bb.0: # %entry 69; CHECK-NEXT: movl %edi, %eax 70; CHECK-NEXT: andl $-16, %eax 71; CHECK-NEXT: retq 72entry: 73 %shr = lshr i16 %v, 4 74 %ext = zext i16 %shr to i64 75 %shl = shl i64 %ext, 4 76 ret i64 %shl 77} 78 79define i64 @fun6(i32 zeroext %v) { 80; CHECK-LABEL: fun6: 81; CHECK: # %bb.0: # %entry 82; CHECK-NEXT: movl %edi, %eax 83; CHECK-NEXT: andl $-16, %eax 84; CHECK-NEXT: retq 85entry: 86 %shr = lshr i32 %v, 4 87 %ext = zext i32 %shr to i64 88 %shl = shl i64 %ext, 4 89 ret i64 %shl 90} 91 92; Don't fold the pattern if we use arithmetic shifts. 93 94define i64 @fun7(i8 zeroext %v) { 95; CHECK-LABEL: fun7: 96; CHECK: # %bb.0: # %entry 97; CHECK-NEXT: sarb $4, %dil 98; CHECK-NEXT: movzbl %dil, %eax 99; CHECK-NEXT: shlq $4, %rax 100; CHECK-NEXT: retq 101entry: 102 %shr = ashr i8 %v, 4 103 %ext = zext i8 %shr to i64 104 %shl = shl i64 %ext, 4 105 ret i64 %shl 106} 107 108define i64 @fun8(i16 zeroext %v) { 109; CHECK-LABEL: fun8: 110; CHECK: # %bb.0: # %entry 111; CHECK-NEXT: movswl %di, %eax 112; CHECK-NEXT: shrl $4, %eax 113; CHECK-NEXT: movzwl %ax, %eax 114; CHECK-NEXT: shlq $4, %rax 115; CHECK-NEXT: retq 116entry: 117 %shr = ashr i16 %v, 4 118 %ext = zext i16 %shr to i64 119 %shl = shl i64 %ext, 4 120 ret i64 %shl 121} 122 123define i64 @fun9(i32 zeroext %v) { 124; CHECK-LABEL: fun9: 125; CHECK: # %bb.0: # %entry 126; CHECK-NEXT: movl %edi, %eax 127; CHECK-NEXT: sarl $4, %eax 128; CHECK-NEXT: shlq $4, %rax 129; CHECK-NEXT: retq 130entry: 131 %shr = ashr i32 %v, 4 132 %ext = zext i32 %shr to i64 133 %shl = shl i64 %ext, 4 134 ret i64 %shl 135} 136 137; Don't fold the pattern if there is more than one use of the 138; operand in input to the shift left. 139 140define i64 @fun10(i8 zeroext %v) { 141; CHECK-LABEL: fun10: 142; CHECK: # %bb.0: # %entry 143; CHECK-NEXT: shrb $4, %dil 144; CHECK-NEXT: movzbl %dil, %ecx 145; CHECK-NEXT: movq %rcx, %rax 146; CHECK-NEXT: shlq $4, %rax 147; CHECK-NEXT: orq %rcx, %rax 148; CHECK-NEXT: retq 149entry: 150 %shr = lshr i8 %v, 4 151 %ext = zext i8 %shr to i64 152 %shl = shl i64 %ext, 4 153 %add = add i64 %shl, %ext 154 ret i64 %add 155} 156 157define i64 @fun11(i16 zeroext %v) { 158; CHECK-LABEL: fun11: 159; CHECK: # %bb.0: # %entry 160; CHECK-NEXT: # kill: def $edi killed $edi def $rdi 161; CHECK-NEXT: shrl $4, %edi 162; CHECK-NEXT: movq %rdi, %rax 163; CHECK-NEXT: shlq $4, %rax 164; CHECK-NEXT: addq %rdi, %rax 165; CHECK-NEXT: retq 166entry: 167 %shr = lshr i16 %v, 4 168 %ext = zext i16 %shr to i64 169 %shl = shl i64 %ext, 4 170 %add = add i64 %shl, %ext 171 ret i64 %add 172} 173 174define i64 @fun12(i32 zeroext %v) { 175; CHECK-LABEL: fun12: 176; CHECK: # %bb.0: # %entry 177; CHECK-NEXT: # kill: def $edi killed $edi def $rdi 178; CHECK-NEXT: shrl $4, %edi 179; CHECK-NEXT: movq %rdi, %rax 180; CHECK-NEXT: shlq $4, %rax 181; CHECK-NEXT: addq %rdi, %rax 182; CHECK-NEXT: retq 183entry: 184 %shr = lshr i32 %v, 4 185 %ext = zext i32 %shr to i64 186 %shl = shl i64 %ext, 4 187 %add = add i64 %shl, %ext 188 ret i64 %add 189} 190 191; PR17380 192; Make sure that the combined dags are legal if we run the DAGCombiner after 193; Legalization took place. The add instruction is redundant and increases by 194; one the number of uses of the zext. This prevents the transformation from 195; firing before dags are legalized and optimized. 196; Once the add is removed, the number of uses becomes one and therefore the 197; dags are canonicalized. After Legalization, we need to make sure that the 198; valuetype for the shift count is legal. 199; Verify also that we correctly fold the shl-shr sequence into an 200; AND with bitmask. 201 202define void @g(i32 %a) { 203; CHECK-LABEL: g: 204; CHECK: # %bb.0: 205; CHECK-NEXT: # kill: def $edi killed $edi def $rdi 206; CHECK-NEXT: andl $-4, %edi 207; CHECK-NEXT: jmp f # TAILCALL 208 %b = lshr i32 %a, 2 209 %c = zext i32 %b to i64 210 %d = add i64 %c, 1 211 %e = shl i64 %c, 2 212 tail call void @f(i64 %e) 213 ret void 214} 215 216declare dso_local void @f(i64) 217 218