1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32 3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64 4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64 5 6declare i8 @llvm.fshl.i8(i8, i8, i8) 7declare i16 @llvm.fshl.i16(i16, i16, i16) 8declare i32 @llvm.fshl.i32(i32, i32, i32) 9declare i64 @llvm.fshl.i64(i64, i64, i64) 10declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 11 12declare i8 @llvm.fshr.i8(i8, i8, i8) 13declare i16 @llvm.fshr.i16(i16, i16, i16) 14declare i32 @llvm.fshr.i32(i32, i32, i32) 15declare i64 @llvm.fshr.i64(i64, i64, i64) 16declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 17 18; When first 2 operands match, it's a rotate. 19 20define i8 @rotl_i8_const_shift(i8 %x) { 21; CHECK-LABEL: rotl_i8_const_shift: 22; CHECK: # %bb.0: 23; CHECK-NEXT: rotlwi 4, 3, 27 24; CHECK-NEXT: rlwimi 4, 3, 3, 0, 28 25; CHECK-NEXT: mr 3, 4 26; CHECK-NEXT: blr 27 %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) 28 ret i8 %f 29} 30 31define i64 @rotl_i64_const_shift(i64 %x) { 32; CHECK32-LABEL: rotl_i64_const_shift: 33; CHECK32: # %bb.0: 34; CHECK32-NEXT: rotlwi 5, 4, 3 35; CHECK32-NEXT: rotlwi 6, 3, 3 36; CHECK32-NEXT: rlwimi 5, 3, 3, 0, 28 37; CHECK32-NEXT: rlwimi 6, 4, 3, 0, 28 38; CHECK32-NEXT: mr 3, 5 39; CHECK32-NEXT: mr 4, 6 40; CHECK32-NEXT: blr 41; 42; CHECK64-LABEL: rotl_i64_const_shift: 43; CHECK64: # %bb.0: 44; CHECK64-NEXT: rotldi 3, 3, 3 45; CHECK64-NEXT: blr 46 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3) 47 ret i64 %f 48} 49 50; When first 2 operands match, it's a rotate (by variable amount). 51 52define i16 @rotl_i16(i16 %x, i16 %z) { 53; CHECK32-LABEL: rotl_i16: 54; CHECK32: # %bb.0: 55; CHECK32-NEXT: clrlwi 6, 4, 28 56; CHECK32-NEXT: neg 4, 4 57; CHECK32-NEXT: clrlwi 5, 3, 16 58; CHECK32-NEXT: clrlwi 4, 4, 28 59; CHECK32-NEXT: slw 3, 3, 6 60; CHECK32-NEXT: srw 4, 5, 4 61; CHECK32-NEXT: or 3, 3, 4 62; CHECK32-NEXT: blr 63; 64; CHECK64-LABEL: rotl_i16: 65; CHECK64: # %bb.0: 66; CHECK64-NEXT: neg 5, 4 67; CHECK64-NEXT: clrlwi 6, 3, 16 68; CHECK64-NEXT: clrlwi 4, 4, 28 69; CHECK64-NEXT: clrlwi 5, 5, 28 70; CHECK64-NEXT: slw 3, 3, 4 71; CHECK64-NEXT: srw 4, 6, 5 72; CHECK64-NEXT: or 3, 3, 4 73; CHECK64-NEXT: blr 74 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) 75 ret i16 %f 76} 77 78define i32 @rotl_i32(i32 %x, i32 %z) { 79; CHECK-LABEL: rotl_i32: 80; CHECK: # %bb.0: 81; CHECK-NEXT: rotlw 3, 3, 4 82; CHECK-NEXT: blr 83 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z) 84 ret i32 %f 85} 86 87define i64 @rotl_i64(i64 %x, i64 %z) { 88; CHECK32_32-LABEL: rotl_i64: 89; CHECK32_32: # %bb.0: 90; CHECK32_32-NEXT: clrlwi 5, 6, 26 91; CHECK32_32-NEXT: subfic 8, 5, 32 92; CHECK32_32-NEXT: neg 6, 6 93; CHECK32_32-NEXT: slw 7, 3, 5 94; CHECK32_32-NEXT: addi 9, 5, -32 95; CHECK32_32-NEXT: srw 8, 4, 8 96; CHECK32_32-NEXT: clrlwi 6, 6, 26 97; CHECK32_32-NEXT: slw 9, 4, 9 98; CHECK32_32-NEXT: or 7, 7, 8 99; CHECK32_32-NEXT: subfic 8, 6, 32 100; CHECK32_32-NEXT: or 7, 7, 9 101; CHECK32_32-NEXT: addi 9, 6, -32 102; CHECK32_32-NEXT: slw 8, 3, 8 103; CHECK32_32-NEXT: srw 9, 3, 9 104; CHECK32_32-NEXT: srw 3, 3, 6 105; CHECK32_32-NEXT: srw 6, 4, 6 106; CHECK32_32-NEXT: or 6, 6, 8 107; CHECK32_32-NEXT: or 6, 6, 9 108; CHECK32_32-NEXT: slw 4, 4, 5 109; CHECK32_32-NEXT: or 3, 7, 3 110; CHECK32_32-NEXT: or 4, 4, 6 111; CHECK32_32-NEXT: blr 112; 113; CHECK32_64-LABEL: rotl_i64: 114; CHECK32_64: # %bb.0: 115; CHECK32_64-NEXT: clrlwi 5, 6, 26 116; CHECK32_64-NEXT: neg 6, 6 117; CHECK32_64-NEXT: subfic 8, 5, 32 118; CHECK32_64-NEXT: slw 7, 3, 5 119; CHECK32_64-NEXT: clrlwi 6, 6, 26 120; CHECK32_64-NEXT: srw 8, 4, 8 121; CHECK32_64-NEXT: addi 9, 5, -32 122; CHECK32_64-NEXT: or 7, 7, 8 123; CHECK32_64-NEXT: subfic 8, 6, 32 124; CHECK32_64-NEXT: slw 5, 4, 5 125; CHECK32_64-NEXT: slw 9, 4, 9 126; CHECK32_64-NEXT: srw 10, 3, 6 127; CHECK32_64-NEXT: srw 4, 4, 6 128; CHECK32_64-NEXT: addi 6, 6, -32 129; CHECK32_64-NEXT: slw 8, 3, 8 130; CHECK32_64-NEXT: srw 3, 3, 6 131; CHECK32_64-NEXT: or 4, 4, 8 132; CHECK32_64-NEXT: or 6, 7, 9 133; CHECK32_64-NEXT: or 4, 4, 3 134; CHECK32_64-NEXT: or 3, 6, 10 135; CHECK32_64-NEXT: or 4, 5, 4 136; CHECK32_64-NEXT: blr 137; 138; CHECK64-LABEL: rotl_i64: 139; CHECK64: # %bb.0: 140; CHECK64-NEXT: rotld 3, 3, 4 141; CHECK64-NEXT: blr 142 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) 143 ret i64 %f 144} 145 146; Vector rotate. 147 148define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { 149; CHECK32_32-LABEL: rotl_v4i32: 150; CHECK32_32: # %bb.0: 151; CHECK32_32-NEXT: rotlw 3, 3, 7 152; CHECK32_32-NEXT: rotlw 4, 4, 8 153; CHECK32_32-NEXT: rotlw 5, 5, 9 154; CHECK32_32-NEXT: rotlw 6, 6, 10 155; CHECK32_32-NEXT: blr 156; 157; CHECK32_64-LABEL: rotl_v4i32: 158; CHECK32_64: # %bb.0: 159; CHECK32_64-NEXT: vrlw 2, 2, 3 160; CHECK32_64-NEXT: blr 161; 162; CHECK64-LABEL: rotl_v4i32: 163; CHECK64: # %bb.0: 164; CHECK64-NEXT: vrlw 2, 2, 3 165; CHECK64-NEXT: blr 166 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) 167 ret <4 x i32> %f 168} 169 170; Vector rotate by constant splat amount. 171 172define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) { 173; CHECK32_32-LABEL: rotl_v4i32_const_shift: 174; CHECK32_32: # %bb.0: 175; CHECK32_32-NEXT: rotlwi 3, 3, 3 176; CHECK32_32-NEXT: rotlwi 4, 4, 3 177; CHECK32_32-NEXT: rotlwi 5, 5, 3 178; CHECK32_32-NEXT: rotlwi 6, 6, 3 179; CHECK32_32-NEXT: blr 180; 181; CHECK32_64-LABEL: rotl_v4i32_const_shift: 182; CHECK32_64: # %bb.0: 183; CHECK32_64-NEXT: vspltisw 3, 3 184; CHECK32_64-NEXT: vrlw 2, 2, 3 185; CHECK32_64-NEXT: blr 186; 187; CHECK64-LABEL: rotl_v4i32_const_shift: 188; CHECK64: # %bb.0: 189; CHECK64-NEXT: vspltisw 3, 3 190; CHECK64-NEXT: vrlw 2, 2, 3 191; CHECK64-NEXT: blr 192 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 193 ret <4 x i32> %f 194} 195 196; Repeat everything for funnel shift right. 197 198define i8 @rotr_i8_const_shift(i8 %x) { 199; CHECK-LABEL: rotr_i8_const_shift: 200; CHECK: # %bb.0: 201; CHECK-NEXT: rotlwi 4, 3, 29 202; CHECK-NEXT: rlwimi 4, 3, 5, 0, 26 203; CHECK-NEXT: mr 3, 4 204; CHECK-NEXT: blr 205 %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3) 206 ret i8 %f 207} 208 209define i32 @rotr_i32_const_shift(i32 %x) { 210; CHECK-LABEL: rotr_i32_const_shift: 211; CHECK: # %bb.0: 212; CHECK-NEXT: rotlwi 3, 3, 29 213; CHECK-NEXT: blr 214 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3) 215 ret i32 %f 216} 217 218; When first 2 operands match, it's a rotate (by variable amount). 219 220define i16 @rotr_i16(i16 %x, i16 %z) { 221; CHECK32-LABEL: rotr_i16: 222; CHECK32: # %bb.0: 223; CHECK32-NEXT: clrlwi 6, 4, 28 224; CHECK32-NEXT: neg 4, 4 225; CHECK32-NEXT: clrlwi 5, 3, 16 226; CHECK32-NEXT: clrlwi 4, 4, 28 227; CHECK32-NEXT: srw 5, 5, 6 228; CHECK32-NEXT: slw 3, 3, 4 229; CHECK32-NEXT: or 3, 5, 3 230; CHECK32-NEXT: blr 231; 232; CHECK64-LABEL: rotr_i16: 233; CHECK64: # %bb.0: 234; CHECK64-NEXT: neg 5, 4 235; CHECK64-NEXT: clrlwi 6, 3, 16 236; CHECK64-NEXT: clrlwi 4, 4, 28 237; CHECK64-NEXT: clrlwi 5, 5, 28 238; CHECK64-NEXT: srw 4, 6, 4 239; CHECK64-NEXT: slw 3, 3, 5 240; CHECK64-NEXT: or 3, 4, 3 241; CHECK64-NEXT: blr 242 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z) 243 ret i16 %f 244} 245 246define i32 @rotr_i32(i32 %x, i32 %z) { 247; CHECK-LABEL: rotr_i32: 248; CHECK: # %bb.0: 249; CHECK-NEXT: neg 4, 4 250; CHECK-NEXT: rotlw 3, 3, 4 251; CHECK-NEXT: blr 252 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z) 253 ret i32 %f 254} 255 256define i64 @rotr_i64(i64 %x, i64 %z) { 257; CHECK32_32-LABEL: rotr_i64: 258; CHECK32_32: # %bb.0: 259; CHECK32_32-NEXT: clrlwi 5, 6, 26 260; CHECK32_32-NEXT: subfic 8, 5, 32 261; CHECK32_32-NEXT: neg 6, 6 262; CHECK32_32-NEXT: srw 7, 4, 5 263; CHECK32_32-NEXT: addi 9, 5, -32 264; CHECK32_32-NEXT: slw 8, 3, 8 265; CHECK32_32-NEXT: clrlwi 6, 6, 26 266; CHECK32_32-NEXT: srw 9, 3, 9 267; CHECK32_32-NEXT: or 7, 7, 8 268; CHECK32_32-NEXT: subfic 8, 6, 32 269; CHECK32_32-NEXT: or 7, 7, 9 270; CHECK32_32-NEXT: addi 9, 6, -32 271; CHECK32_32-NEXT: srw 8, 4, 8 272; CHECK32_32-NEXT: slw 9, 4, 9 273; CHECK32_32-NEXT: slw 4, 4, 6 274; CHECK32_32-NEXT: slw 6, 3, 6 275; CHECK32_32-NEXT: or 6, 6, 8 276; CHECK32_32-NEXT: or 6, 6, 9 277; CHECK32_32-NEXT: srw 3, 3, 5 278; CHECK32_32-NEXT: or 4, 7, 4 279; CHECK32_32-NEXT: or 3, 3, 6 280; CHECK32_32-NEXT: blr 281; 282; CHECK32_64-LABEL: rotr_i64: 283; CHECK32_64: # %bb.0: 284; CHECK32_64-NEXT: clrlwi 5, 6, 26 285; CHECK32_64-NEXT: neg 6, 6 286; CHECK32_64-NEXT: subfic 8, 5, 32 287; CHECK32_64-NEXT: srw 7, 4, 5 288; CHECK32_64-NEXT: clrlwi 6, 6, 26 289; CHECK32_64-NEXT: slw 8, 3, 8 290; CHECK32_64-NEXT: addi 9, 5, -32 291; CHECK32_64-NEXT: or 7, 7, 8 292; CHECK32_64-NEXT: subfic 8, 6, 32 293; CHECK32_64-NEXT: srw 5, 3, 5 294; CHECK32_64-NEXT: srw 9, 3, 9 295; CHECK32_64-NEXT: slw 10, 4, 6 296; CHECK32_64-NEXT: slw 3, 3, 6 297; CHECK32_64-NEXT: addi 6, 6, -32 298; CHECK32_64-NEXT: srw 8, 4, 8 299; CHECK32_64-NEXT: slw 4, 4, 6 300; CHECK32_64-NEXT: or 3, 3, 8 301; CHECK32_64-NEXT: or 6, 7, 9 302; CHECK32_64-NEXT: or 3, 3, 4 303; CHECK32_64-NEXT: or 4, 6, 10 304; CHECK32_64-NEXT: or 3, 5, 3 305; CHECK32_64-NEXT: blr 306; 307; CHECK64-LABEL: rotr_i64: 308; CHECK64: # %bb.0: 309; CHECK64-NEXT: neg 4, 4 310; CHECK64-NEXT: rotld 3, 3, 4 311; CHECK64-NEXT: blr 312 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) 313 ret i64 %f 314} 315 316; Vector rotate. 317 318define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) { 319; CHECK32_32-LABEL: rotr_v4i32: 320; CHECK32_32: # %bb.0: 321; CHECK32_32-NEXT: neg 7, 7 322; CHECK32_32-NEXT: neg 8, 8 323; CHECK32_32-NEXT: neg 9, 9 324; CHECK32_32-NEXT: neg 10, 10 325; CHECK32_32-NEXT: rotlw 3, 3, 7 326; CHECK32_32-NEXT: rotlw 4, 4, 8 327; CHECK32_32-NEXT: rotlw 5, 5, 9 328; CHECK32_32-NEXT: rotlw 6, 6, 10 329; CHECK32_32-NEXT: blr 330; 331; CHECK32_64-LABEL: rotr_v4i32: 332; CHECK32_64: # %bb.0: 333; CHECK32_64-NEXT: vxor 4, 4, 4 334; CHECK32_64-NEXT: vsubuwm 3, 4, 3 335; CHECK32_64-NEXT: vrlw 2, 2, 3 336; CHECK32_64-NEXT: blr 337; 338; CHECK64-LABEL: rotr_v4i32: 339; CHECK64: # %bb.0: 340; CHECK64-NEXT: xxlxor 36, 36, 36 341; CHECK64-NEXT: vsubuwm 3, 4, 3 342; CHECK64-NEXT: vrlw 2, 2, 3 343; CHECK64-NEXT: blr 344 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) 345 ret <4 x i32> %f 346} 347 348; Vector rotate by constant splat amount. 349 350define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) { 351; CHECK32_32-LABEL: rotr_v4i32_const_shift: 352; CHECK32_32: # %bb.0: 353; CHECK32_32-NEXT: rotlwi 3, 3, 29 354; CHECK32_32-NEXT: rotlwi 4, 4, 29 355; CHECK32_32-NEXT: rotlwi 5, 5, 29 356; CHECK32_32-NEXT: rotlwi 6, 6, 29 357; CHECK32_32-NEXT: blr 358; 359; CHECK32_64-LABEL: rotr_v4i32_const_shift: 360; CHECK32_64: # %bb.0: 361; CHECK32_64-NEXT: vspltisw 3, -16 362; CHECK32_64-NEXT: vspltisw 4, 13 363; CHECK32_64-NEXT: vsubuwm 3, 4, 3 364; CHECK32_64-NEXT: vrlw 2, 2, 3 365; CHECK32_64-NEXT: blr 366; 367; CHECK64-LABEL: rotr_v4i32_const_shift: 368; CHECK64: # %bb.0: 369; CHECK64-NEXT: vspltisw 3, -16 370; CHECK64-NEXT: vspltisw 4, 13 371; CHECK64-NEXT: vsubuwm 3, 4, 3 372; CHECK64-NEXT: vrlw 2, 2, 3 373; CHECK64-NEXT: blr 374 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 375 ret <4 x i32> %f 376} 377 378define i32 @rotl_i32_shift_by_bitwidth(i32 %x) { 379; CHECK-LABEL: rotl_i32_shift_by_bitwidth: 380; CHECK: # %bb.0: 381; CHECK-NEXT: blr 382 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32) 383 ret i32 %f 384} 385 386define i32 @rotr_i32_shift_by_bitwidth(i32 %x) { 387; CHECK-LABEL: rotr_i32_shift_by_bitwidth: 388; CHECK: # %bb.0: 389; CHECK-NEXT: blr 390 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32) 391 ret i32 %f 392} 393 394define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) { 395; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth: 396; CHECK: # %bb.0: 397; CHECK-NEXT: blr 398 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 399 ret <4 x i32> %f 400} 401 402define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) { 403; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth: 404; CHECK: # %bb.0: 405; CHECK-NEXT: blr 406 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 407 ret <4 x i32> %f 408} 409 410