1; RUN: opt < %s -instcombine -S | FileCheck %s 2 3target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" 4 5; These are UB-free rotate left/right patterns that are narrowed to a smaller bitwidth. 6; See PR34046 and PR16726 for motivating examples: 7; https://bugs.llvm.org/show_bug.cgi?id=34046 8; https://bugs.llvm.org/show_bug.cgi?id=16726 9 10define i16 @rotate_left_16bit(i16 %v, i32 %shift) { 11; CHECK-LABEL: @rotate_left_16bit( 12; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %shift to i16 13; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 14; CHECK-NEXT: [[TMP3:%.*]] = sub i16 0, [[TMP1]] 15; CHECK-NEXT: [[TMP4:%.*]] = and i16 [[TMP3]], 15 16; CHECK-NEXT: [[TMP5:%.*]] = lshr i16 %v, [[TMP4]] 17; CHECK-NEXT: [[TMP6:%.*]] = shl i16 %v, [[TMP2]] 18; CHECK-NEXT: [[CONV2:%.*]] = or i16 [[TMP5]], [[TMP6]] 19; CHECK-NEXT: ret i16 [[CONV2]] 20; 21 %and = and i32 %shift, 15 22 %conv = zext i16 %v to i32 23 %shl = shl i32 %conv, %and 24 %sub = sub i32 16, %and 25 %shr = lshr i32 %conv, %sub 26 %or = or i32 %shr, %shl 27 %conv2 = trunc i32 %or to i16 28 ret i16 %conv2 29} 30 31; Commute the 'or' operands and try a vector type. 32 33define <2 x i16> @rotate_left_commute_16bit_vec(<2 x i16> %v, <2 x i32> %shift) { 34; CHECK-LABEL: @rotate_left_commute_16bit_vec( 35; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> %shift to <2 x i16> 36; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i16> [[TMP1]], <i16 15, i16 15> 37; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i16> zeroinitializer, [[TMP1]] 38; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i16> [[TMP3]], <i16 15, i16 15> 39; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i16> %v, [[TMP2]] 40; CHECK-NEXT: [[TMP6:%.*]] = lshr <2 x i16> %v, [[TMP4]] 41; CHECK-NEXT: [[CONV2:%.*]] = or <2 x i16> [[TMP5]], [[TMP6]] 42; CHECK-NEXT: ret <2 x i16> [[CONV2]] 43; 44 %and = and <2 x i32> %shift, <i32 15, i32 15> 45 %conv = zext <2 x i16> %v to <2 x i32> 46 %shl = shl <2 x i32> %conv, %and 47 %sub = sub <2 x i32> <i32 16, i32 16>, %and 48 %shr = lshr <2 x i32> %conv, %sub 49 %or = or <2 x i32> %shl, %shr 50 %conv2 = trunc <2 x i32> %or to <2 x i16> 51 ret <2 x i16> %conv2 52} 53 54; Change the size, rotation direction (the subtract is on the left-shift), and mask op. 55 56define i8 @rotate_right_8bit(i8 %v, i3 %shift) { 57; CHECK-LABEL: @rotate_right_8bit( 58; CHECK-NEXT: [[TMP1:%.*]] = zext i3 %shift to i8 59; CHECK-NEXT: [[TMP2:%.*]] = sub i3 0, %shift 60; CHECK-NEXT: [[TMP3:%.*]] = zext i3 [[TMP2]] to i8 61; CHECK-NEXT: [[TMP4:%.*]] = shl i8 %v, [[TMP3]] 62; CHECK-NEXT: [[TMP5:%.*]] = lshr i8 %v, [[TMP1]] 63; CHECK-NEXT: [[CONV2:%.*]] = or i8 [[TMP4]], [[TMP5]] 64; CHECK-NEXT: ret i8 [[CONV2]] 65; 66 %and = zext i3 %shift to i32 67 %conv = zext i8 %v to i32 68 %shr = lshr i32 %conv, %and 69 %sub = sub i32 8, %and 70 %shl = shl i32 %conv, %sub 71 %or = or i32 %shl, %shr 72 %conv2 = trunc i32 %or to i8 73 ret i8 %conv2 74} 75 76; The shifted value does not need to be a zexted value; here it is masked. 77; The shift mask could be less than the bitwidth, but this is still ok. 78 79define i8 @rotate_right_commute_8bit(i32 %v, i32 %shift) { 80; CHECK-LABEL: @rotate_right_commute_8bit( 81; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %shift to i8 82; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 3 83; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i8 0, [[TMP2]] 84; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 7 85; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 %v to i8 86; CHECK-NEXT: [[TMP6:%.*]] = lshr i8 [[TMP5]], [[TMP2]] 87; CHECK-NEXT: [[TMP7:%.*]] = shl i8 [[TMP5]], [[TMP4]] 88; CHECK-NEXT: [[CONV2:%.*]] = or i8 [[TMP6]], [[TMP7]] 89; CHECK-NEXT: ret i8 [[CONV2]] 90; 91 %and = and i32 %shift, 3 92 %conv = and i32 %v, 255 93 %shr = lshr i32 %conv, %and 94 %sub = sub i32 8, %and 95 %shl = shl i32 %conv, %sub 96 %or = or i32 %shr, %shl 97 %conv2 = trunc i32 %or to i8 98 ret i8 %conv2 99} 100 101; If the original source does not mask the shift amount, 102; we still do the transform by adding masks to make it safe. 103 104define i8 @rotate8_not_safe(i8 %v, i32 %shamt) { 105; CHECK-LABEL: @rotate8_not_safe( 106; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %shamt to i8 107; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]] 108; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP1]], 7 109; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP2]], 7 110; CHECK-NEXT: [[TMP5:%.*]] = lshr i8 %v, [[TMP4]] 111; CHECK-NEXT: [[TMP6:%.*]] = shl i8 %v, [[TMP3]] 112; CHECK-NEXT: [[RET:%.*]] = or i8 [[TMP5]], [[TMP6]] 113; CHECK-NEXT: ret i8 [[RET]] 114; 115 %conv = zext i8 %v to i32 116 %sub = sub i32 8, %shamt 117 %shr = lshr i32 %conv, %sub 118 %shl = shl i32 %conv, %shamt 119 %or = or i32 %shr, %shl 120 %ret = trunc i32 %or to i8 121 ret i8 %ret 122} 123 124; The next two tests mask sure we narrower (x << (x & 15)) | (x >> (-x & 15)) 125; when types have been promoted. 126; FIXME: We should be able to narrow this. 127 128define i16 @rotate16_neg_mask(i16 %v, i16 %shamt) { 129; CHECK-LABEL: @rotate16_neg_mask( 130; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[V:%.*]] to i32 131; CHECK-NEXT: [[RSHAMT:%.*]] = and i16 [[SHAMT:%.*]], 15 132; CHECK-NEXT: [[RSHAMTCONV:%.*]] = zext i16 [[RSHAMT]] to i32 133; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMTCONV]] 134; CHECK-NEXT: [[NEG:%.*]] = sub i16 0, [[SHAMT]] 135; CHECK-NEXT: [[LSHAMT:%.*]] = and i16 [[NEG]], 15 136; CHECK-NEXT: [[LSHAMTCONV:%.*]] = zext i16 [[LSHAMT]] to i32 137; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMTCONV]] 138; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] 139; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i16 140; CHECK-NEXT: ret i16 [[RET]] 141; 142 %conv = zext i16 %v to i32 143 %rshamt = and i16 %shamt, 15 144 %rshamtconv = zext i16 %rshamt to i32 145 %shr = lshr i32 %conv, %rshamtconv 146 %neg = sub i16 0, %shamt 147 %lshamt = and i16 %neg, 15 148 %lshamtconv = zext i16 %lshamt to i32 149 %shl = shl i32 %conv, %lshamtconv 150 %or = or i32 %shr, %shl 151 %ret = trunc i32 %or to i16 152 ret i16 %ret 153} 154 155define i8 @rotate8_neg_mask(i8 %v, i8 %shamt) { 156; CHECK-LABEL: @rotate8_neg_mask( 157; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[V:%.*]] to i32 158; CHECK-NEXT: [[RSHAMT:%.*]] = and i8 [[SHAMT:%.*]], 7 159; CHECK-NEXT: [[RSHAMTCONV:%.*]] = zext i8 [[RSHAMT]] to i32 160; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMTCONV]] 161; CHECK-NEXT: [[NEG:%.*]] = sub i8 0, [[SHAMT]] 162; CHECK-NEXT: [[LSHAMT:%.*]] = and i8 [[NEG]], 7 163; CHECK-NEXT: [[LSHAMTCONV:%.*]] = zext i8 [[LSHAMT]] to i32 164; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMTCONV]] 165; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] 166; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i8 167; CHECK-NEXT: ret i8 [[RET]] 168; 169 %conv = zext i8 %v to i32 170 %rshamt = and i8 %shamt, 7 171 %rshamtconv = zext i8 %rshamt to i32 172 %shr = lshr i32 %conv, %rshamtconv 173 %neg = sub i8 0, %shamt 174 %lshamt = and i8 %neg, 7 175 %lshamtconv = zext i8 %lshamt to i32 176 %shl = shl i32 %conv, %lshamtconv 177 %or = or i32 %shr, %shl 178 %ret = trunc i32 %or to i8 179 ret i8 %ret 180} 181 182; The next two types have a shift amount that is already i32 so we would still 183; need a truncate for it going into the rotate pattern. 184; FIXME: We can narrow this, but we would still need a trunc on the shift amt. 185 186define i16 @rotate16_neg_mask_wide_amount(i16 %v, i32 %shamt) { 187; CHECK-LABEL: @rotate16_neg_mask_wide_amount( 188; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[V:%.*]] to i32 189; CHECK-NEXT: [[RSHAMT:%.*]] = and i32 [[SHAMT:%.*]], 15 190; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMT]] 191; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[SHAMT]] 192; CHECK-NEXT: [[LSHAMT:%.*]] = and i32 [[NEG]], 15 193; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMT]] 194; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] 195; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i16 196; CHECK-NEXT: ret i16 [[RET]] 197; 198 %conv = zext i16 %v to i32 199 %rshamt = and i32 %shamt, 15 200 %shr = lshr i32 %conv, %rshamt 201 %neg = sub i32 0, %shamt 202 %lshamt = and i32 %neg, 15 203 %shl = shl i32 %conv, %lshamt 204 %or = or i32 %shr, %shl 205 %ret = trunc i32 %or to i16 206 ret i16 %ret 207} 208 209define i8 @rotate8_neg_mask_wide_amount(i8 %v, i32 %shamt) { 210; CHECK-LABEL: @rotate8_neg_mask_wide_amount( 211; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[V:%.*]] to i32 212; CHECK-NEXT: [[RSHAMT:%.*]] = and i32 [[SHAMT:%.*]], 7 213; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONV]], [[RSHAMT]] 214; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[SHAMT]] 215; CHECK-NEXT: [[LSHAMT:%.*]] = and i32 [[NEG]], 7 216; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[LSHAMT]] 217; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] 218; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i8 219; CHECK-NEXT: ret i8 [[RET]] 220; 221 %conv = zext i8 %v to i32 222 %rshamt = and i32 %shamt, 7 223 %shr = lshr i32 %conv, %rshamt 224 %neg = sub i32 0, %shamt 225 %lshamt = and i32 %neg, 7 226 %shl = shl i32 %conv, %lshamt 227 %or = or i32 %shr, %shl 228 %ret = trunc i32 %or to i8 229 ret i8 %ret 230} 231