1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -aggressive-instcombine -S | FileCheck %s 3 4; PR37098 - https://bugs.llvm.org/show_bug.cgi?id=37098 5 6define i32 @anyset_two_bit_mask(i32 %x) { 7; CHECK-LABEL: @anyset_two_bit_mask( 8; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 9 9; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 10; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 11; CHECK-NEXT: ret i32 [[TMP3]] 12; 13 %s = lshr i32 %x, 3 14 %o = or i32 %s, %x 15 %r = and i32 %o, 1 16 ret i32 %r 17} 18 19define <2 x i32> @anyset_two_bit_mask_uniform(<2 x i32> %x) { 20; CHECK-LABEL: @anyset_two_bit_mask_uniform( 21; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 9, i32 9> 22; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer 23; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> 24; CHECK-NEXT: ret <2 x i32> [[TMP3]] 25; 26 %s = lshr <2 x i32> %x, <i32 3, i32 3> 27 %o = or <2 x i32> %s, %x 28 %r = and <2 x i32> %o, <i32 1, i32 1> 29 ret <2 x i32> %r 30} 31 32define i32 @anyset_four_bit_mask(i32 %x) { 33; CHECK-LABEL: @anyset_four_bit_mask( 34; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 297 35; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 36; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 37; CHECK-NEXT: ret i32 [[TMP3]] 38; 39 %t1 = lshr i32 %x, 3 40 %t2 = lshr i32 %x, 5 41 %t3 = lshr i32 %x, 8 42 %o1 = or i32 %t1, %x 43 %o2 = or i32 %t2, %t3 44 %o3 = or i32 %o1, %o2 45 %r = and i32 %o3, 1 46 ret i32 %r 47} 48 49define <2 x i32> @anyset_four_bit_mask_uniform(<2 x i32> %x) { 50; CHECK-LABEL: @anyset_four_bit_mask_uniform( 51; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 297, i32 297> 52; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer 53; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> 54; CHECK-NEXT: ret <2 x i32> [[TMP3]] 55; 56 %t1 = lshr <2 x i32> %x, <i32 3, i32 3> 57 %t2 = lshr <2 x i32> %x, <i32 5, i32 5> 58 %t3 = lshr <2 x i32> %x, <i32 8, i32 8> 59 %o1 = or <2 x i32> %t1, %x 60 %o2 = or <2 x i32> %t2, %t3 61 %o3 = or <2 x i32> %o1, %o2 62 %r = and <2 x i32> %o3, <i32 1, i32 1> 63 ret <2 x i32> %r 64} 65 66; We're not testing the LSB here, so all of the 'or' operands are shifts. 67 68define i32 @anyset_three_bit_mask_all_shifted_bits(i32 %x) { 69; CHECK-LABEL: @anyset_three_bit_mask_all_shifted_bits( 70; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 296 71; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 72; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 73; CHECK-NEXT: ret i32 [[TMP3]] 74; 75 %t1 = lshr i32 %x, 3 76 %t2 = lshr i32 %x, 5 77 %t3 = lshr i32 %x, 8 78 %o2 = or i32 %t2, %t3 79 %o3 = or i32 %t1, %o2 80 %r = and i32 %o3, 1 81 ret i32 %r 82} 83 84define <2 x i32> @anyset_three_bit_mask_all_shifted_bits_uniform(<2 x i32> %x) { 85; CHECK-LABEL: @anyset_three_bit_mask_all_shifted_bits_uniform( 86; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 296, i32 296> 87; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer 88; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> 89; CHECK-NEXT: ret <2 x i32> [[TMP3]] 90; 91 %t1 = lshr <2 x i32> %x, <i32 3, i32 3> 92 %t2 = lshr <2 x i32> %x, <i32 5, i32 5> 93 %t3 = lshr <2 x i32> %x, <i32 8, i32 8> 94 %o2 = or <2 x i32> %t2, %t3 95 %o3 = or <2 x i32> %t1, %o2 96 %r = and <2 x i32> %o3, <i32 1, i32 1> 97 ret <2 x i32> %r 98} 99 100; Recognize the 'and' sibling pattern (all-bits-set). The 'and 1' may not be at the end. 101 102define i32 @allset_two_bit_mask(i32 %x) { 103; CHECK-LABEL: @allset_two_bit_mask( 104; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 129 105; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 129 106; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 107; CHECK-NEXT: ret i32 [[TMP3]] 108; 109 %s = lshr i32 %x, 7 110 %o = and i32 %s, %x 111 %r = and i32 %o, 1 112 ret i32 %r 113} 114 115define <2 x i32> @allset_two_bit_mask_uniform(<2 x i32> %x) { 116; CHECK-LABEL: @allset_two_bit_mask_uniform( 117; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 129, i32 129> 118; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 129, i32 129> 119; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> 120; CHECK-NEXT: ret <2 x i32> [[TMP3]] 121; 122 %s = lshr <2 x i32> %x, <i32 7, i32 7> 123 %o = and <2 x i32> %s, %x 124 %r = and <2 x i32> %o, <i32 1, i32 1> 125 ret <2 x i32> %r 126} 127 128define i64 @allset_four_bit_mask(i64 %x) { 129; CHECK-LABEL: @allset_four_bit_mask( 130; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 30 131; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 30 132; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i64 133; CHECK-NEXT: ret i64 [[TMP3]] 134; 135 %t1 = lshr i64 %x, 1 136 %t2 = lshr i64 %x, 2 137 %t3 = lshr i64 %x, 3 138 %t4 = lshr i64 %x, 4 139 %a1 = and i64 %t4, 1 140 %a2 = and i64 %t2, %a1 141 %a3 = and i64 %a2, %t1 142 %r = and i64 %a3, %t3 143 ret i64 %r 144} 145 146declare void @use(i32) 147 148; negative test - extra use means the transform would increase instruction count 149 150define i32 @allset_two_bit_mask_multiuse(i32 %x) { 151; CHECK-LABEL: @allset_two_bit_mask_multiuse( 152; CHECK-NEXT: [[S:%.*]] = lshr i32 [[X:%.*]], 7 153; CHECK-NEXT: [[O:%.*]] = and i32 [[S]], [[X]] 154; CHECK-NEXT: [[R:%.*]] = and i32 [[O]], 1 155; CHECK-NEXT: call void @use(i32 [[O]]) 156; CHECK-NEXT: ret i32 [[R]] 157; 158 %s = lshr i32 %x, 7 159 %o = and i32 %s, %x 160 %r = and i32 %o, 1 161 call void @use(i32 %o) 162 ret i32 %r 163} 164 165; negative test - missing 'and 1' mask, so more than the low bit is used here 166 167define i8 @allset_three_bit_mask_no_and1(i8 %x) { 168; CHECK-LABEL: @allset_three_bit_mask_no_and1( 169; CHECK-NEXT: [[T1:%.*]] = lshr i8 [[X:%.*]], 1 170; CHECK-NEXT: [[T2:%.*]] = lshr i8 [[X]], 2 171; CHECK-NEXT: [[T3:%.*]] = lshr i8 [[X]], 3 172; CHECK-NEXT: [[A2:%.*]] = and i8 [[T1]], [[T2]] 173; CHECK-NEXT: [[R:%.*]] = and i8 [[A2]], [[T3]] 174; CHECK-NEXT: ret i8 [[R]] 175; 176 %t1 = lshr i8 %x, 1 177 %t2 = lshr i8 %x, 2 178 %t3 = lshr i8 %x, 3 179 %a2 = and i8 %t1, %t2 180 %r = and i8 %a2, %t3 181 ret i8 %r 182} 183 184; This test demonstrates that the transform can be large. If the implementation 185; is slow or explosive (stack overflow due to recursion), it should be made efficient. 186 187define i64 @allset_40_bit_mask(i64 %x) { 188; CHECK-LABEL: @allset_40_bit_mask( 189; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2199023255550 190; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 2199023255550 191; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i64 192; CHECK-NEXT: ret i64 [[TMP3]] 193; 194 %t1 = lshr i64 %x, 1 195 %t2 = lshr i64 %x, 2 196 %t3 = lshr i64 %x, 3 197 %t4 = lshr i64 %x, 4 198 %t5 = lshr i64 %x, 5 199 %t6 = lshr i64 %x, 6 200 %t7 = lshr i64 %x, 7 201 %t8 = lshr i64 %x, 8 202 %t9 = lshr i64 %x, 9 203 %t10 = lshr i64 %x, 10 204 %t11 = lshr i64 %x, 11 205 %t12 = lshr i64 %x, 12 206 %t13 = lshr i64 %x, 13 207 %t14 = lshr i64 %x, 14 208 %t15 = lshr i64 %x, 15 209 %t16 = lshr i64 %x, 16 210 %t17 = lshr i64 %x, 17 211 %t18 = lshr i64 %x, 18 212 %t19 = lshr i64 %x, 19 213 %t20 = lshr i64 %x, 20 214 %t21 = lshr i64 %x, 21 215 %t22 = lshr i64 %x, 22 216 %t23 = lshr i64 %x, 23 217 %t24 = lshr i64 %x, 24 218 %t25 = lshr i64 %x, 25 219 %t26 = lshr i64 %x, 26 220 %t27 = lshr i64 %x, 27 221 %t28 = lshr i64 %x, 28 222 %t29 = lshr i64 %x, 29 223 %t30 = lshr i64 %x, 30 224 %t31 = lshr i64 %x, 31 225 %t32 = lshr i64 %x, 32 226 %t33 = lshr i64 %x, 33 227 %t34 = lshr i64 %x, 34 228 %t35 = lshr i64 %x, 35 229 %t36 = lshr i64 %x, 36 230 %t37 = lshr i64 %x, 37 231 %t38 = lshr i64 %x, 38 232 %t39 = lshr i64 %x, 39 233 %t40 = lshr i64 %x, 40 234 235 %a1 = and i64 %t1, 1 236 %a2 = and i64 %t2, %a1 237 %a3 = and i64 %t3, %a2 238 %a4 = and i64 %t4, %a3 239 %a5 = and i64 %t5, %a4 240 %a6 = and i64 %t6, %a5 241 %a7 = and i64 %t7, %a6 242 %a8 = and i64 %t8, %a7 243 %a9 = and i64 %t9, %a8 244 %a10 = and i64 %t10, %a9 245 %a11 = and i64 %t11, %a10 246 %a12 = and i64 %t12, %a11 247 %a13 = and i64 %t13, %a12 248 %a14 = and i64 %t14, %a13 249 %a15 = and i64 %t15, %a14 250 %a16 = and i64 %t16, %a15 251 %a17 = and i64 %t17, %a16 252 %a18 = and i64 %t18, %a17 253 %a19 = and i64 %t19, %a18 254 %a20 = and i64 %t20, %a19 255 %a21 = and i64 %t21, %a20 256 %a22 = and i64 %t22, %a21 257 %a23 = and i64 %t23, %a22 258 %a24 = and i64 %t24, %a23 259 %a25 = and i64 %t25, %a24 260 %a26 = and i64 %t26, %a25 261 %a27 = and i64 %t27, %a26 262 %a28 = and i64 %t28, %a27 263 %a29 = and i64 %t29, %a28 264 %a30 = and i64 %t30, %a29 265 %a31 = and i64 %t31, %a30 266 %a32 = and i64 %t32, %a31 267 %a33 = and i64 %t33, %a32 268 %a34 = and i64 %t34, %a33 269 %a35 = and i64 %t35, %a34 270 %a36 = and i64 %t36, %a35 271 %a37 = and i64 %t37, %a36 272 %a38 = and i64 %t38, %a37 273 %a39 = and i64 %t39, %a38 274 %a40 = and i64 %t40, %a39 275 276 ret i64 %a40 277} 278 279; Verify that unsimplified code doesn't crash: 280; https://bugs.llvm.org/show_bug.cgi?id=37446 281 282define i32 @PR37446(i32 %x) { 283; CHECK-LABEL: @PR37446( 284; CHECK-NEXT: [[SHR:%.*]] = lshr i32 1, 33 285; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHR]], 15 286; CHECK-NEXT: [[AND1:%.*]] = and i32 [[AND]], [[X:%.*]] 287; CHECK-NEXT: ret i32 [[AND1]] 288; 289 %shr = lshr i32 1, 33 290 %and = and i32 %shr, 15 291 %and1 = and i32 %and, %x 292 ret i32 %and1 293} 294 295