1; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s 2 3; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X)) 4 5; Canolicalize the sequence shl/zext/lshr performing the zeroextend 6; as the last instruction of the sequence. 7; This will help DAGCombiner to identify and then fold the sequence 8; of shifts into a single AND. 9; This transformation is profitable if the shift amounts are the same 10; and if there is only one use of the zext. 11 12define i16 @fun1(i8 zeroext %v) { 13entry: 14 %shr = lshr i8 %v, 4 15 %ext = zext i8 %shr to i16 16 %shl = shl i16 %ext, 4 17 ret i16 %shl 18} 19 20; CHECK-LABEL: @fun1 21; CHECK: and 22; CHECK-NOT: shr 23; CHECK-NOT: shl 24; CHECK: ret 25 26define i32 @fun2(i8 zeroext %v) { 27entry: 28 %shr = lshr i8 %v, 4 29 %ext = zext i8 %shr to i32 30 %shl = shl i32 %ext, 4 31 ret i32 %shl 32} 33 34; CHECK-LABEL: @fun2 35; CHECK: and 36; CHECK-NOT: shr 37; CHECK-NOT: shl 38; CHECK: ret 39 40define i32 @fun3(i16 zeroext %v) { 41entry: 42 %shr = lshr i16 %v, 4 43 %ext = zext i16 %shr to i32 44 %shl = shl i32 %ext, 4 45 ret i32 %shl 46} 47 48; CHECK-LABEL: @fun3 49; CHECK: and 50; CHECK-NOT: shr 51; CHECK-NOT: shl 52; CHECK: ret 53 54define i64 @fun4(i8 zeroext %v) { 55entry: 56 %shr = lshr i8 %v, 4 57 %ext = zext i8 %shr to i64 58 %shl = shl i64 %ext, 4 59 ret i64 %shl 60} 61 62; CHECK-LABEL: @fun4 63; CHECK: and 64; CHECK-NOT: shr 65; CHECK-NOT: shl 66; CHECK: ret 67 68define i64 @fun5(i16 zeroext %v) { 69entry: 70 %shr = lshr i16 %v, 4 71 %ext = zext i16 %shr to i64 72 %shl = shl i64 %ext, 4 73 ret i64 %shl 74} 75 76; CHECK-LABEL: @fun5 77; CHECK: and 78; CHECK-NOT: shr 79; CHECK-NOT: shl 80; CHECK: ret 81 82define i64 @fun6(i32 zeroext %v) { 83entry: 84 %shr = lshr i32 %v, 4 85 %ext = zext i32 %shr to i64 86 %shl = shl i64 %ext, 4 87 ret i64 %shl 88} 89 90; CHECK-LABEL: @fun6 91; CHECK: and 92; CHECK-NOT: shr 93; CHECK-NOT: shl 94; CHECK: ret 95 96; Don't fold the pattern if we use arithmetic shifts. 97 98define i64 @fun7(i8 zeroext %v) { 99entry: 100 %shr = ashr i8 %v, 4 101 %ext = zext i8 %shr to i64 102 %shl = shl i64 %ext, 4 103 ret i64 %shl 104} 105 106; CHECK-LABEL: @fun7 107; CHECK: sar 108; CHECK: shl 109; CHECK: ret 110 111define i64 @fun8(i16 zeroext %v) { 112entry: 113 %shr = ashr i16 %v, 4 114 %ext = zext i16 %shr to i64 115 %shl = shl i64 %ext, 4 116 ret i64 %shl 117} 118 119; CHECK-LABEL: @fun8 120; CHECK: sar 121; CHECK: shl 122; CHECK: ret 123 124define i64 @fun9(i32 zeroext %v) { 125entry: 126 %shr = ashr i32 %v, 4 127 %ext = zext i32 %shr to i64 128 %shl = shl i64 %ext, 4 129 ret i64 %shl 130} 131 132; CHECK-LABEL: @fun9 133; CHECK: sar 134; CHECK: shl 135; CHECK: ret 136 137; Don't fold the pattern if there is more than one use of the 138; operand in input to the shift left. 139 140define i64 @fun10(i8 zeroext %v) { 141entry: 142 %shr = lshr i8 %v, 4 143 %ext = zext i8 %shr to i64 144 %shl = shl i64 %ext, 4 145 %add = add i64 %shl, %ext 146 ret i64 %add 147} 148 149; CHECK-LABEL: @fun10 150; CHECK: shr 151; CHECK: shl 152; CHECK: ret 153 154define i64 @fun11(i16 zeroext %v) { 155entry: 156 %shr = lshr i16 %v, 4 157 %ext = zext i16 %shr to i64 158 %shl = shl i64 %ext, 4 159 %add = add i64 %shl, %ext 160 ret i64 %add 161} 162 163; CHECK-LABEL: @fun11 164; CHECK: shr 165; CHECK: shl 166; CHECK: ret 167 168define i64 @fun12(i32 zeroext %v) { 169entry: 170 %shr = lshr i32 %v, 4 171 %ext = zext i32 %shr to i64 172 %shl = shl i64 %ext, 4 173 %add = add i64 %shl, %ext 174 ret i64 %add 175} 176 177; CHECK-LABEL: @fun12 178; CHECK: shr 179; CHECK: shl 180; CHECK: ret 181 182; PR17380 183; Make sure that the combined dags are legal if we run the DAGCombiner after 184; Legalization took place. The add instruction is redundant and increases by 185; one the number of uses of the zext. This prevents the transformation from 186; firing before dags are legalized and optimized. 187; Once the add is removed, the number of uses becomes one and therefore the 188; dags are canonicalized. After Legalization, we need to make sure that the 189; valuetype for the shift count is legal. 190; Verify also that we correctly fold the shl-shr sequence into an 191; AND with bitmask. 192 193define void @g(i32 %a) { 194 %b = lshr i32 %a, 2 195 %c = zext i32 %b to i64 196 %d = add i64 %c, 1 197 %e = shl i64 %c, 2 198 tail call void @f(i64 %e) 199 ret void 200} 201 202; CHECK-LABEL: @g 203; CHECK-NOT: shr 204; CHECK-NOT: shl 205; CHECK: and 206; CHECK-NEXT: jmp 207 208declare void @f(i64) 209 210