1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3 4declare i32 @llvm.fshl.i32(i32, i32, i32) 5declare i33 @llvm.fshr.i33(i33, i33, i33) 6declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) 7declare <2 x i31> @llvm.fshl.v2i31(<2 x i31>, <2 x i31>, <2 x i31>) 8 9; If the shift mask doesn't include any demanded bits, the funnel shift can be eliminated. 10 11define i32 @fshl_mask_simplify1(i32 %x, i32 %y, i32 %sh) { 12; CHECK-LABEL: @fshl_mask_simplify1( 13; CHECK-NEXT: ret i32 [[X:%.*]] 14; 15 %maskedsh = and i32 %sh, 32 16 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh) 17 ret i32 %r 18} 19 20define <2 x i32> @fshr_mask_simplify2(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) { 21; CHECK-LABEL: @fshr_mask_simplify2( 22; CHECK-NEXT: ret <2 x i32> [[Y:%.*]] 23; 24 %maskedsh = and <2 x i32> %sh, <i32 64, i32 64> 25 %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %maskedsh) 26 ret <2 x i32> %r 27} 28 29; Negative test. 30 31define i32 @fshl_mask_simplify3(i32 %x, i32 %y, i32 %sh) { 32; CHECK-LABEL: @fshl_mask_simplify3( 33; CHECK-NEXT: [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 16 34; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]]) 35; CHECK-NEXT: ret i32 [[R]] 36; 37 %maskedsh = and i32 %sh, 16 38 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh) 39 ret i32 %r 40} 41 42; Check again with weird bitwidths - the analysis is invalid with non-power-of-2. 43 44define i33 @fshr_mask_simplify1(i33 %x, i33 %y, i33 %sh) { 45; CHECK-LABEL: @fshr_mask_simplify1( 46; CHECK-NEXT: [[MASKEDSH:%.*]] = and i33 [[SH:%.*]], 64 47; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 [[MASKEDSH]]) 48; CHECK-NEXT: ret i33 [[R]] 49; 50 %maskedsh = and i33 %sh, 64 51 %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %maskedsh) 52 ret i33 %r 53} 54 55; Check again with weird bitwidths - the analysis is invalid with non-power-of-2. 56 57define <2 x i31> @fshl_mask_simplify2(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) { 58; CHECK-LABEL: @fshl_mask_simplify2( 59; CHECK-NEXT: [[MASKEDSH:%.*]] = and <2 x i31> [[SH:%.*]], <i31 32, i31 32> 60; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[MASKEDSH]]) 61; CHECK-NEXT: ret <2 x i31> [[R]] 62; 63 %maskedsh = and <2 x i31> %sh, <i31 32, i31 32> 64 %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> %maskedsh) 65 ret <2 x i31> %r 66} 67 68; Check again with weird bitwidths - the analysis is invalid with non-power-of-2. 69 70define i33 @fshr_mask_simplify3(i33 %x, i33 %y, i33 %sh) { 71; CHECK-LABEL: @fshr_mask_simplify3( 72; CHECK-NEXT: [[MASKEDSH:%.*]] = and i33 [[SH:%.*]], 32 73; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 [[MASKEDSH]]) 74; CHECK-NEXT: ret i33 [[R]] 75; 76 %maskedsh = and i33 %sh, 32 77 %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %maskedsh) 78 ret i33 %r 79} 80 81; This mask op is unnecessary. 82 83define i32 @fshl_mask_not_required(i32 %x, i32 %y, i32 %sh) { 84; CHECK-LABEL: @fshl_mask_not_required( 85; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[SH:%.*]]) 86; CHECK-NEXT: ret i32 [[R]] 87; 88 %maskedsh = and i32 %sh, 31 89 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh) 90 ret i32 %r 91} 92 93; This mask op can be reduced. 94 95define i32 @fshl_mask_reduce_constant(i32 %x, i32 %y, i32 %sh) { 96; CHECK-LABEL: @fshl_mask_reduce_constant( 97; CHECK-NEXT: [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 1 98; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]]) 99; CHECK-NEXT: ret i32 [[R]] 100; 101 %maskedsh = and i32 %sh, 33 102 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh) 103 ret i32 %r 104} 105 106; But this mask op is required. 107 108define i32 @fshl_mask_negative(i32 %x, i32 %y, i32 %sh) { 109; CHECK-LABEL: @fshl_mask_negative( 110; CHECK-NEXT: [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 15 111; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]]) 112; CHECK-NEXT: ret i32 [[R]] 113; 114 %maskedsh = and i32 %sh, 15 115 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh) 116 ret i32 %r 117} 118 119; The transform is not limited to mask ops. 120 121define <2 x i32> @fshr_set_but_not_demanded_vec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) { 122; CHECK-LABEL: @fshr_set_but_not_demanded_vec( 123; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[SH:%.*]]) 124; CHECK-NEXT: ret <2 x i32> [[R]] 125; 126 %bogusbits = or <2 x i32> %sh, <i32 32, i32 32> 127 %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %bogusbits) 128 ret <2 x i32> %r 129} 130 131; Check again with weird bitwidths - the analysis is invalid with non-power-of-2. 132 133define <2 x i31> @fshl_set_but_not_demanded_vec(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) { 134; CHECK-LABEL: @fshl_set_but_not_demanded_vec( 135; CHECK-NEXT: [[BOGUSBITS:%.*]] = or <2 x i31> [[SH:%.*]], <i31 32, i31 32> 136; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[BOGUSBITS]]) 137; CHECK-NEXT: ret <2 x i31> [[R]] 138; 139 %bogusbits = or <2 x i31> %sh, <i31 32, i31 32> 140 %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> %bogusbits) 141 ret <2 x i31> %r 142} 143 144; Simplify one undef or zero operand and constant shift amount. 145 146define i32 @fshl_op0_undef(i32 %x) { 147; CHECK-LABEL: @fshl_op0_undef( 148; CHECK-NEXT: [[R:%.*]] = lshr i32 [[X:%.*]], 25 149; CHECK-NEXT: ret i32 [[R]] 150; 151 %r = call i32 @llvm.fshl.i32(i32 undef, i32 %x, i32 7) 152 ret i32 %r 153} 154 155define i32 @fshl_op0_zero(i32 %x) { 156; CHECK-LABEL: @fshl_op0_zero( 157; CHECK-NEXT: [[R:%.*]] = lshr i32 [[X:%.*]], 25 158; CHECK-NEXT: ret i32 [[R]] 159; 160 %r = call i32 @llvm.fshl.i32(i32 0, i32 %x, i32 7) 161 ret i32 %r 162} 163 164define i33 @fshr_op0_undef(i33 %x) { 165; CHECK-LABEL: @fshr_op0_undef( 166; CHECK-NEXT: [[R:%.*]] = lshr i33 [[X:%.*]], 7 167; CHECK-NEXT: ret i33 [[R]] 168; 169 %r = call i33 @llvm.fshr.i33(i33 undef, i33 %x, i33 7) 170 ret i33 %r 171} 172 173define i33 @fshr_op0_zero(i33 %x) { 174; CHECK-LABEL: @fshr_op0_zero( 175; CHECK-NEXT: [[R:%.*]] = lshr i33 [[X:%.*]], 7 176; CHECK-NEXT: ret i33 [[R]] 177; 178 %r = call i33 @llvm.fshr.i33(i33 0, i33 %x, i33 7) 179 ret i33 %r 180} 181 182define i32 @fshl_op1_undef(i32 %x) { 183; CHECK-LABEL: @fshl_op1_undef( 184; CHECK-NEXT: [[R:%.*]] = shl i32 [[X:%.*]], 7 185; CHECK-NEXT: ret i32 [[R]] 186; 187 %r = call i32 @llvm.fshl.i32(i32 %x, i32 undef, i32 7) 188 ret i32 %r 189} 190 191define i32 @fshl_op1_zero(i32 %x) { 192; CHECK-LABEL: @fshl_op1_zero( 193; CHECK-NEXT: [[R:%.*]] = shl i32 [[X:%.*]], 7 194; CHECK-NEXT: ret i32 [[R]] 195; 196 %r = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 7) 197 ret i32 %r 198} 199 200define i33 @fshr_op1_undef(i33 %x) { 201; CHECK-LABEL: @fshr_op1_undef( 202; CHECK-NEXT: [[R:%.*]] = shl i33 [[X:%.*]], 26 203; CHECK-NEXT: ret i33 [[R]] 204; 205 %r = call i33 @llvm.fshr.i33(i33 %x, i33 undef, i33 7) 206 ret i33 %r 207} 208 209define i33 @fshr_op1_zero(i33 %x) { 210; CHECK-LABEL: @fshr_op1_zero( 211; CHECK-NEXT: [[R:%.*]] = shl i33 [[X:%.*]], 26 212; CHECK-NEXT: ret i33 [[R]] 213; 214 %r = call i33 @llvm.fshr.i33(i33 %x, i33 0, i33 7) 215 ret i33 %r 216} 217 218define <2 x i31> @fshl_op0_zero_splat_vec(<2 x i31> %x) { 219; CHECK-LABEL: @fshl_op0_zero_splat_vec( 220; CHECK-NEXT: [[R:%.*]] = lshr <2 x i31> [[X:%.*]], <i31 24, i31 24> 221; CHECK-NEXT: ret <2 x i31> [[R]] 222; 223 %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> <i31 7, i31 7>) 224 ret <2 x i31> %r 225} 226 227define <2 x i31> @fshl_op1_undef_splat_vec(<2 x i31> %x) { 228; CHECK-LABEL: @fshl_op1_undef_splat_vec( 229; CHECK-NEXT: [[R:%.*]] = shl <2 x i31> [[X:%.*]], <i31 7, i31 7> 230; CHECK-NEXT: ret <2 x i31> [[R]] 231; 232 %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> <i31 7, i31 7>) 233 ret <2 x i31> %r 234} 235 236define <2 x i32> @fshr_op0_undef_splat_vec(<2 x i32> %x) { 237; CHECK-LABEL: @fshr_op0_undef_splat_vec( 238; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 7, i32 7> 239; CHECK-NEXT: ret <2 x i32> [[R]] 240; 241 %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> <i32 7, i32 7>) 242 ret <2 x i32> %r 243} 244 245define <2 x i32> @fshr_op1_zero_splat_vec(<2 x i32> %x) { 246; CHECK-LABEL: @fshr_op1_zero_splat_vec( 247; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[X:%.*]], <i32 25, i32 25> 248; CHECK-NEXT: ret <2 x i32> [[R]] 249; 250 %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> <i32 7, i32 7>) 251 ret <2 x i32> %r 252} 253 254define <2 x i31> @fshl_op0_zero_vec(<2 x i31> %x) { 255; CHECK-LABEL: @fshl_op0_zero_vec( 256; CHECK-NEXT: [[R:%.*]] = lshr <2 x i31> [[X:%.*]], <i31 30, i31 29> 257; CHECK-NEXT: ret <2 x i31> [[R]] 258; 259 %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> <i31 -1, i31 33>) 260 ret <2 x i31> %r 261} 262 263define <2 x i31> @fshl_op1_undef_vec(<2 x i31> %x) { 264; CHECK-LABEL: @fshl_op1_undef_vec( 265; CHECK-NEXT: [[R:%.*]] = shl <2 x i31> [[X:%.*]], <i31 1, i31 2> 266; CHECK-NEXT: ret <2 x i31> [[R]] 267; 268 %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> <i31 -1, i31 33>) 269 ret <2 x i31> %r 270} 271 272define <2 x i32> @fshr_op0_undef_vec(<2 x i32> %x) { 273; CHECK-LABEL: @fshr_op0_undef_vec( 274; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 1> 275; CHECK-NEXT: ret <2 x i32> [[R]] 276; 277 %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> <i32 -1, i32 33>) 278 ret <2 x i32> %r 279} 280 281define <2 x i32> @fshr_op1_zero_vec(<2 x i32> %x) { 282; CHECK-LABEL: @fshr_op1_zero_vec( 283; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[X:%.*]], <i32 1, i32 31> 284; CHECK-NEXT: ret <2 x i32> [[R]] 285; 286 %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> <i32 -1, i32 33>) 287 ret <2 x i32> %r 288} 289 290; Only demand bits from one of the operands. 291 292define i32 @fshl_only_op0_demanded(i32 %x, i32 %y) { 293; CHECK-LABEL: @fshl_only_op0_demanded( 294; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X:%.*]], 7 295; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 128 296; CHECK-NEXT: ret i32 [[R]] 297; 298 %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7) 299 %r = and i32 %z, 128 300 ret i32 %r 301} 302 303define i32 @fshl_only_op1_demanded(i32 %x, i32 %y) { 304; CHECK-LABEL: @fshl_only_op1_demanded( 305; CHECK-NEXT: [[Z:%.*]] = lshr i32 [[Y:%.*]], 25 306; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 63 307; CHECK-NEXT: ret i32 [[R]] 308; 309 %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7) 310 %r = and i32 %z, 63 311 ret i32 %r 312} 313 314define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) { 315; CHECK-LABEL: @fshr_only_op1_demanded( 316; CHECK-NEXT: [[Z:%.*]] = lshr i33 [[Y:%.*]], 7 317; CHECK-NEXT: [[R:%.*]] = and i33 [[Z]], 12392 318; CHECK-NEXT: ret i33 [[R]] 319; 320 %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 7) 321 %r = and i33 %z, 12392 322 ret i33 %r 323} 324 325define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) { 326; CHECK-LABEL: @fshr_only_op0_demanded( 327; CHECK-NEXT: [[TMP1:%.*]] = lshr i33 [[X:%.*]], 4 328; CHECK-NEXT: [[R:%.*]] = and i33 [[TMP1]], 7 329; CHECK-NEXT: ret i33 [[R]] 330; 331 %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 7) 332 %r = lshr i33 %z, 30 333 ret i33 %r 334} 335 336define <2 x i31> @fshl_only_op1_demanded_vec_splat(<2 x i31> %x, <2 x i31> %y) { 337; CHECK-LABEL: @fshl_only_op1_demanded_vec_splat( 338; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], <i31 24, i31 24> 339; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31> 340; CHECK-NEXT: ret <2 x i31> [[R]] 341; 342 %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 7>) 343 %r = and <2 x i31> %z, <i31 63, i31 31> 344 ret <2 x i31> %r 345} 346 347define i32 @fshl_constant_shift_amount_modulo_bitwidth(i32 %x, i32 %y) { 348; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth( 349; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 1) 350; CHECK-NEXT: ret i32 [[R]] 351; 352 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 33) 353 ret i32 %r 354} 355 356define i33 @fshr_constant_shift_amount_modulo_bitwidth(i33 %x, i33 %y) { 357; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth( 358; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 32) 359; CHECK-NEXT: ret i33 [[R]] 360; 361 %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 34) 362 ret i33 %r 363} 364 365define i32 @fshl_undef_shift_amount(i32 %x, i32 %y) { 366; CHECK-LABEL: @fshl_undef_shift_amount( 367; CHECK-NEXT: ret i32 [[X:%.*]] 368; 369 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 undef) 370 ret i32 %r 371} 372 373define i33 @fshr_undef_shift_amount(i33 %x, i33 %y) { 374; CHECK-LABEL: @fshr_undef_shift_amount( 375; CHECK-NEXT: ret i33 [[Y:%.*]] 376; 377 %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 undef) 378 ret i33 %r 379} 380 381@external_global = external global i8 382 383define i33 @fshr_constant_shift_amount_modulo_bitwidth_constexpr(i33 %x, i33 %y) { 384; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth_constexpr( 385; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 ptrtoint (i8* @external_global to i33)) 386; CHECK-NEXT: ret i33 [[R]] 387; 388 %shamt = ptrtoint i8* @external_global to i33 389 %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %shamt) 390 ret i33 %r 391} 392 393define <2 x i32> @fshr_constant_shift_amount_modulo_bitwidth_vec(<2 x i32> %x, <2 x i32> %y) { 394; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth_vec( 395; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> <i32 30, i32 1>) 396; CHECK-NEXT: ret <2 x i32> [[R]] 397; 398 %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 34, i32 -1>) 399 ret <2 x i32> %r 400} 401 402define <2 x i31> @fshl_constant_shift_amount_modulo_bitwidth_vec(<2 x i31> %x, <2 x i31> %y) { 403; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth_vec( 404; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 3, i31 1>) 405; CHECK-NEXT: ret <2 x i31> [[R]] 406; 407 %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 34, i31 -1>) 408 ret <2 x i31> %r 409} 410 411define <2 x i31> @fshl_constant_shift_amount_modulo_bitwidth_vec_const_expr(<2 x i31> %x, <2 x i31> %y) { 412; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth_vec_const_expr( 413; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 34, i31 ptrtoint (i8* @external_global to i31)>) 414; CHECK-NEXT: ret <2 x i31> [[R]] 415; 416 %shamt = ptrtoint i8* @external_global to i31 417 %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 34, i31 ptrtoint (i8* @external_global to i31)>) 418 ret <2 x i31> %r 419} 420 421define <2 x i31> @fshl_undef_shift_amount_vec(<2 x i31> %x, <2 x i31> %y) { 422; CHECK-LABEL: @fshl_undef_shift_amount_vec( 423; CHECK-NEXT: ret <2 x i31> [[X:%.*]] 424; 425 %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> undef) 426 ret <2 x i31> %r 427} 428 429define <2 x i32> @fshr_undef_shift_amount_vec(<2 x i32> %x, <2 x i32> %y) { 430; CHECK-LABEL: @fshr_undef_shift_amount_vec( 431; CHECK-NEXT: ret <2 x i32> [[Y:%.*]] 432; 433 %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> undef) 434 ret <2 x i32> %r 435} 436 437; TODO: Don't let SimplifyDemandedBits split up a rotate - keep the same operand. 438 439define i32 @rotl_common_demanded(i32 %a0) { 440; CHECK-LABEL: @rotl_common_demanded( 441; CHECK-NEXT: [[X:%.*]] = xor i32 [[A0:%.*]], 2 442; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[A0]], i32 8) 443; CHECK-NEXT: ret i32 [[R]] 444; 445 %x = xor i32 %a0, 2 446 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 8) 447 ret i32 %r 448} 449 450define i33 @rotr_common_demanded(i33 %a0) { 451; CHECK-LABEL: @rotr_common_demanded( 452; CHECK-NEXT: [[X:%.*]] = xor i33 [[A0:%.*]], 2 453; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X]], i33 [[A0]], i33 25) 454; CHECK-NEXT: ret i33 [[R]] 455; 456 %x = xor i33 %a0, 2 457 %r = call i33 @llvm.fshr.i33(i33 %x, i33 %x, i33 8) 458 ret i33 %r 459} 460 461; The shift modulo bitwidth is the same for all vector elements. 462 463define <2 x i31> @fshl_only_op1_demanded_vec_nonsplat(<2 x i31> %x, <2 x i31> %y) { 464; CHECK-LABEL: @fshl_only_op1_demanded_vec_nonsplat( 465; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], <i31 24, i31 24> 466; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31> 467; CHECK-NEXT: ret <2 x i31> [[R]] 468; 469 %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 38>) 470 %r = and <2 x i31> %z, <i31 63, i31 31> 471 ret <2 x i31> %r 472} 473 474define i32 @rotl_constant_shift_amount(i32 %x) { 475; CHECK-LABEL: @rotl_constant_shift_amount( 476; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 1) 477; CHECK-NEXT: ret i32 [[R]] 478; 479 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 33) 480 ret i32 %r 481} 482 483define <2 x i31> @rotl_constant_shift_amount_vec(<2 x i31> %x) { 484; CHECK-LABEL: @rotl_constant_shift_amount_vec( 485; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[X]], <2 x i31> <i31 1, i31 1>) 486; CHECK-NEXT: ret <2 x i31> [[R]] 487; 488 %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %x, <2 x i31> <i31 32, i31 -1>) 489 ret <2 x i31> %r 490} 491 492define i33 @rotr_constant_shift_amount(i33 %x) { 493; CHECK-LABEL: @rotr_constant_shift_amount( 494; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[X]], i33 32) 495; CHECK-NEXT: ret i33 [[R]] 496; 497 %r = call i33 @llvm.fshr.i33(i33 %x, i33 %x, i33 34) 498 ret i33 %r 499} 500 501define <2 x i32> @rotr_constant_shift_amount_vec(<2 x i32> %x) { 502; CHECK-LABEL: @rotr_constant_shift_amount_vec( 503; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 31, i32 1>) 504; CHECK-NEXT: ret <2 x i32> [[R]] 505; 506 %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 33, i32 -1>) 507 ret <2 x i32> %r 508} 509 510; Demand bits from both operands -- cannot simplify. 511 512define i32 @fshl_both_ops_demanded(i32 %x, i32 %y) { 513; CHECK-LABEL: @fshl_both_ops_demanded( 514; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7) 515; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 192 516; CHECK-NEXT: ret i32 [[R]] 517; 518 %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7) 519 %r = and i32 %z, 192 520 ret i32 %r 521} 522 523define i33 @fshr_both_ops_demanded(i33 %x, i33 %y) { 524; CHECK-LABEL: @fshr_both_ops_demanded( 525; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 7) 526; CHECK-NEXT: [[R:%.*]] = and i33 [[Z]], 192 527; CHECK-NEXT: ret i33 [[R]] 528; 529 %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 26) 530 %r = and i33 %z, 192 531 ret i33 %r 532} 533 534; Both operands are demanded, but there are known bits. 535 536define i32 @fshl_known_bits(i32 %x, i32 %y) { 537; CHECK-LABEL: @fshl_known_bits( 538; CHECK-NEXT: ret i32 128 539; 540 %x2 = or i32 %x, 1 ; lo bit set 541 %y2 = lshr i32 %y, 1 ; hi bit clear 542 %z = call i32 @llvm.fshl.i32(i32 %x2, i32 %y2, i32 7) 543 %r = and i32 %z, 192 544 ret i32 %r 545} 546 547define i33 @fshr_known_bits(i33 %x, i33 %y) { 548; CHECK-LABEL: @fshr_known_bits( 549; CHECK-NEXT: ret i33 128 550; 551 %x2 = or i33 %x, 1 ; lo bit set 552 %y2 = lshr i33 %y, 1 ; hi bit set 553 %z = call i33 @llvm.fshr.i33(i33 %x2, i33 %y2, i33 26) 554 %r = and i33 %z, 192 555 ret i33 %r 556} 557 558; This case fails to simplify due to multiple uses. 559 560define i33 @fshr_multi_use(i33 %a) { 561; CHECK-LABEL: @fshr_multi_use( 562; CHECK-NEXT: [[B:%.*]] = call i33 @llvm.fshl.i33(i33 [[A:%.*]], i33 [[A]], i33 32) 563; CHECK-NEXT: [[C:%.*]] = lshr i33 [[B]], 23 564; CHECK-NEXT: [[D:%.*]] = xor i33 [[C]], [[B]] 565; CHECK-NEXT: [[E:%.*]] = and i33 [[D]], 31 566; CHECK-NEXT: ret i33 [[E]] 567; 568 %b = tail call i33 @llvm.fshr.i33(i33 %a, i33 %a, i33 1) 569 %c = lshr i33 %b, 23 570 %d = xor i33 %c, %b 571 %e = and i33 %d, 31 572 ret i33 %e 573} 574 575; This demonstrates the same simplification working if the fshr intrinsic 576; is expanded into shifts and or. 577 578define i33 @expanded_fshr_multi_use(i33 %a) { 579; CHECK-LABEL: @expanded_fshr_multi_use( 580; CHECK-NEXT: [[B:%.*]] = call i33 @llvm.fshl.i33(i33 [[A:%.*]], i33 [[A]], i33 32) 581; CHECK-NEXT: [[C:%.*]] = lshr i33 [[B]], 23 582; CHECK-NEXT: [[D:%.*]] = xor i33 [[C]], [[B]] 583; CHECK-NEXT: [[E:%.*]] = and i33 [[D]], 31 584; CHECK-NEXT: ret i33 [[E]] 585; 586 %t = lshr i33 %a, 1 587 %t2 = shl i33 %a, 32 588 %b = or i33 %t, %t2 589 %c = lshr i33 %b, 23 590 %d = xor i33 %c, %b 591 %e = and i33 %d, 31 592 ret i33 %e 593} 594 595declare i16 @llvm.fshl.i16(i16, i16, i16) 596declare i16 @llvm.fshr.i16(i16, i16, i16) 597declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) 598 599; Special-case: rotate a 16-bit value left/right by 8-bits is bswap. 600 601define i16 @fshl_bswap(i16 %x) { 602; CHECK-LABEL: @fshl_bswap( 603; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.bswap.i16(i16 [[X:%.*]]) 604; CHECK-NEXT: ret i16 [[R]] 605; 606 %r = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 8) 607 ret i16 %r 608} 609 610define i16 @fshr_bswap(i16 %x) { 611; CHECK-LABEL: @fshr_bswap( 612; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.bswap.i16(i16 [[X:%.*]]) 613; CHECK-NEXT: ret i16 [[R]] 614; 615 %r = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 8) 616 ret i16 %r 617} 618 619define <3 x i16> @fshl_bswap_vector(<3 x i16> %x) { 620; CHECK-LABEL: @fshl_bswap_vector( 621; CHECK-NEXT: [[R:%.*]] = call <3 x i16> @llvm.bswap.v3i16(<3 x i16> [[X:%.*]]) 622; CHECK-NEXT: ret <3 x i16> [[R]] 623; 624 %r = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %x, <3 x i16> %x, <3 x i16> <i16 8, i16 8, i16 8>) 625 ret <3 x i16> %r 626} 627 628; Negative test 629 630define i16 @fshl_bswap_wrong_op(i16 %x, i16 %y) { 631; CHECK-LABEL: @fshl_bswap_wrong_op( 632; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i16 8) 633; CHECK-NEXT: ret i16 [[R]] 634; 635 %r = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 8) 636 ret i16 %r 637} 638 639; Negative test 640 641define i16 @fshr_bswap_wrong_amount(i16 %x) { 642; CHECK-LABEL: @fshr_bswap_wrong_amount( 643; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 12) 644; CHECK-NEXT: ret i16 [[R]] 645; 646 %r = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 4) 647 ret i16 %r 648} 649 650; Negative test 651 652define i32 @fshl_bswap_wrong_width(i32 %x) { 653; CHECK-LABEL: @fshl_bswap_wrong_width( 654; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 8) 655; CHECK-NEXT: ret i32 [[R]] 656; 657 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 8) 658 ret i32 %r 659} 660 661define i32 @fshl_mask_args_same1(i32 %a) { 662; CHECK-LABEL: @fshl_mask_args_same1( 663; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[A:%.*]], 16 664; CHECK-NEXT: ret i32 [[T2]] 665; 666 %t1 = and i32 %a, 4294901760 ; 0xffff0000 667 %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 16) 668 ret i32 %t2 669} 670 671define i32 @fshl_mask_args_same2(i32 %a) { 672; CHECK-LABEL: @fshl_mask_args_same2( 673; CHECK-NEXT: [[T1:%.*]] = shl i32 [[A:%.*]], 8 674; CHECK-NEXT: [[T2:%.*]] = and i32 [[T1]], 65280 675; CHECK-NEXT: ret i32 [[T2]] 676; 677 %t1 = and i32 %a, 255 678 %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 8) 679 ret i32 %t2 680} 681 682define i32 @fshl_mask_args_same3(i32 %a) { 683; CHECK-LABEL: @fshl_mask_args_same3( 684; CHECK-NEXT: [[T2:%.*]] = shl i32 [[A:%.*]], 24 685; CHECK-NEXT: ret i32 [[T2]] 686; 687 %t1 = and i32 %a, 255 688 %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 24) 689 ret i32 %t2 690} 691 692define i32 @fshl_mask_args_different(i32 %a) { 693; CHECK-LABEL: @fshl_mask_args_different( 694; CHECK-NEXT: [[T1:%.*]] = lshr i32 [[A:%.*]], 15 695; CHECK-NEXT: [[T3:%.*]] = and i32 [[T1]], 130560 696; CHECK-NEXT: ret i32 [[T3]] 697; 698 %t2 = and i32 %a, 4294901760 ; 0xfffff00f 699 %t1 = and i32 %a, 4278190080 ; 0xff00f00f 700 %t3 = call i32 @llvm.fshl.i32(i32 %t2, i32 %t1, i32 17) 701 ret i32 %t3 702} 703 704define <2 x i31> @fshr_mask_args_same_vector(<2 x i31> %a) { 705; CHECK-LABEL: @fshr_mask_args_same_vector( 706; CHECK-NEXT: [[T3:%.*]] = shl <2 x i31> [[A:%.*]], <i31 10, i31 10> 707; CHECK-NEXT: ret <2 x i31> [[T3]] 708; 709 %t1 = and <2 x i31> %a, <i31 1000, i31 1000> 710 %t2 = and <2 x i31> %a, <i31 6442450943, i31 6442450943> 711 %t3 = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %t2, <2 x i31> %t1, <2 x i31> <i31 10, i31 10>) 712 ret <2 x i31> %t3 713} 714 715define <2 x i32> @fshr_mask_args_same_vector2(<2 x i32> %a, <2 x i32> %b) { 716; CHECK-LABEL: @fshr_mask_args_same_vector2( 717; CHECK-NEXT: [[T1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1000000, i32 100000> 718; CHECK-NEXT: [[T3:%.*]] = lshr exact <2 x i32> [[T1]], <i32 3, i32 3> 719; CHECK-NEXT: ret <2 x i32> [[T3]] 720; 721 %t1 = and <2 x i32> %a, <i32 1000000, i32 100000> 722 %t2 = and <2 x i32> %a, <i32 6442450943, i32 6442450943> 723 %t3 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %t1, <2 x i32> %t1, <2 x i32> <i32 3, i32 3>) 724 ret <2 x i32> %t3 725} 726 727define <2 x i31> @fshr_mask_args_same_vector3_different_but_still_prunable(<2 x i31> %a) { 728; CHECK-LABEL: @fshr_mask_args_same_vector3_different_but_still_prunable( 729; CHECK-NEXT: [[T1:%.*]] = and <2 x i31> [[A:%.*]], <i31 1000, i31 1000> 730; CHECK-NEXT: [[T3:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[A]], <2 x i31> [[T1]], <2 x i31> <i31 10, i31 3>) 731; CHECK-NEXT: ret <2 x i31> [[T3]] 732; 733 %t1 = and <2 x i31> %a, <i31 1000, i31 1000> 734 %t2 = and <2 x i31> %a, <i31 6442450943, i31 6442450943> 735 %t3 = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %t2, <2 x i31> %t1, <2 x i31> <i31 10, i31 3>) 736 ret <2 x i31> %t3 737} 738