1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s 3 4declare i8 @llvm.fshl.i8(i8, i8, i8) 5declare i16 @llvm.fshl.i16(i16, i16, i16) 6declare i32 @llvm.fshl.i32(i32, i32, i32) 7declare i64 @llvm.fshl.i64(i64, i64, i64) 8declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 9 10declare i8 @llvm.fshr.i8(i8, i8, i8) 11declare i16 @llvm.fshr.i16(i16, i16, i16) 12declare i32 @llvm.fshr.i32(i32, i32, i32) 13declare i64 @llvm.fshr.i64(i64, i64, i64) 14declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 15 16; General case - all operands can be variables. 17 18define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { 19; CHECK-LABEL: fshl_i32: 20; CHECK: // %bb.0: 21; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 22; CHECK-NEXT: mvn w9, w2 23; CHECK-NEXT: lsr w10, w1, #1 24; CHECK-NEXT: lsl w8, w0, w2 25; CHECK-NEXT: lsr w9, w10, w9 26; CHECK-NEXT: orr w0, w8, w9 27; CHECK-NEXT: ret 28 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 29 ret i32 %f 30} 31 32define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { 33; CHECK-LABEL: fshl_i64: 34; CHECK: // %bb.0: 35; CHECK-NEXT: mvn w9, w2 36; CHECK-NEXT: lsr x10, x1, #1 37; CHECK-NEXT: lsl x8, x0, x2 38; CHECK-NEXT: lsr x9, x10, x9 39; CHECK-NEXT: orr x0, x8, x9 40; CHECK-NEXT: ret 41 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) 42 ret i64 %f 43} 44 45; Verify that weird types are minimally supported. 46declare i37 @llvm.fshl.i37(i37, i37, i37) 47define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { 48; CHECK-LABEL: fshl_i37: 49; CHECK: // %bb.0: 50; CHECK-NEXT: mov x8, #31883 51; CHECK-NEXT: movk x8, #3542, lsl #16 52; CHECK-NEXT: movk x8, #51366, lsl #32 53; CHECK-NEXT: movk x8, #56679, lsl #48 54; CHECK-NEXT: umulh x8, x2, x8 55; CHECK-NEXT: mov w9, #37 56; CHECK-NEXT: ubfx x8, x8, #5, #27 57; CHECK-NEXT: msub w8, w8, w9, w2 58; CHECK-NEXT: lsl x9, x0, x8 59; CHECK-NEXT: mvn w8, w8 60; CHECK-NEXT: ubfiz x10, x1, #26, #37 61; CHECK-NEXT: lsr x8, x10, x8 62; CHECK-NEXT: orr x0, x9, x8 63; CHECK-NEXT: ret 64 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 65 ret i37 %f 66} 67 68; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 69 70declare i7 @llvm.fshl.i7(i7, i7, i7) 71define i7 @fshl_i7_const_fold() { 72; CHECK-LABEL: fshl_i7_const_fold: 73; CHECK: // %bb.0: 74; CHECK-NEXT: mov w0, #67 75; CHECK-NEXT: ret 76 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 77 ret i7 %f 78} 79 80define i8 @fshl_i8_const_fold_overshift_1() { 81; CHECK-LABEL: fshl_i8_const_fold_overshift_1: 82; CHECK: // %bb.0: 83; CHECK-NEXT: mov w0, #128 84; CHECK-NEXT: ret 85 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) 86 ret i8 %f 87} 88 89define i8 @fshl_i8_const_fold_overshift_2() { 90; CHECK-LABEL: fshl_i8_const_fold_overshift_2: 91; CHECK: // %bb.0: 92; CHECK-NEXT: mov w0, #120 93; CHECK-NEXT: ret 94 %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11) 95 ret i8 %f 96} 97 98define i8 @fshl_i8_const_fold_overshift_3() { 99; CHECK-LABEL: fshl_i8_const_fold_overshift_3: 100; CHECK: // %bb.0: 101; CHECK-NEXT: mov w0, wzr 102; CHECK-NEXT: ret 103 %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8) 104 ret i8 %f 105} 106 107; With constant shift amount, this is 'extr'. 108 109define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { 110; CHECK-LABEL: fshl_i32_const_shift: 111; CHECK: // %bb.0: 112; CHECK-NEXT: extr w0, w0, w1, #23 113; CHECK-NEXT: ret 114 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 115 ret i32 %f 116} 117 118; Check modulo math on shift amount. 119 120define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { 121; CHECK-LABEL: fshl_i32_const_overshift: 122; CHECK: // %bb.0: 123; CHECK-NEXT: extr w0, w0, w1, #23 124; CHECK-NEXT: ret 125 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 126 ret i32 %f 127} 128 129; 64-bit should also work. 130 131define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { 132; CHECK-LABEL: fshl_i64_const_overshift: 133; CHECK: // %bb.0: 134; CHECK-NEXT: extr x0, x0, x1, #23 135; CHECK-NEXT: ret 136 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 137 ret i64 %f 138} 139 140; This should work without any node-specific logic. 141 142define i8 @fshl_i8_const_fold() { 143; CHECK-LABEL: fshl_i8_const_fold: 144; CHECK: // %bb.0: 145; CHECK-NEXT: mov w0, #128 146; CHECK-NEXT: ret 147 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 148 ret i8 %f 149} 150 151; Repeat everything for funnel shift right. 152 153; General case - all operands can be variables. 154 155define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { 156; CHECK-LABEL: fshr_i32: 157; CHECK: // %bb.0: 158; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 159; CHECK-NEXT: mvn w9, w2 160; CHECK-NEXT: lsl w10, w0, #1 161; CHECK-NEXT: lsr w8, w1, w2 162; CHECK-NEXT: lsl w9, w10, w9 163; CHECK-NEXT: orr w0, w9, w8 164; CHECK-NEXT: ret 165 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 166 ret i32 %f 167} 168 169define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) { 170; CHECK-LABEL: fshr_i64: 171; CHECK: // %bb.0: 172; CHECK-NEXT: mvn w9, w2 173; CHECK-NEXT: lsl x10, x0, #1 174; CHECK-NEXT: lsr x8, x1, x2 175; CHECK-NEXT: lsl x9, x10, x9 176; CHECK-NEXT: orr x0, x9, x8 177; CHECK-NEXT: ret 178 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z) 179 ret i64 %f 180} 181 182; Verify that weird types are minimally supported. 183declare i37 @llvm.fshr.i37(i37, i37, i37) 184define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { 185; CHECK-LABEL: fshr_i37: 186; CHECK: // %bb.0: 187; CHECK-NEXT: mov x8, #31883 188; CHECK-NEXT: movk x8, #3542, lsl #16 189; CHECK-NEXT: movk x8, #51366, lsl #32 190; CHECK-NEXT: movk x8, #56679, lsl #48 191; CHECK-NEXT: umulh x8, x2, x8 192; CHECK-NEXT: mov w9, #37 193; CHECK-NEXT: lsr x8, x8, #5 194; CHECK-NEXT: msub w8, w8, w9, w2 195; CHECK-NEXT: lsl x10, x1, #27 196; CHECK-NEXT: add w8, w8, #27 // =27 197; CHECK-NEXT: lsr x9, x10, x8 198; CHECK-NEXT: mvn w8, w8 199; CHECK-NEXT: lsl x10, x0, #1 200; CHECK-NEXT: lsl x8, x10, x8 201; CHECK-NEXT: orr x0, x8, x9 202; CHECK-NEXT: ret 203 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 204 ret i37 %f 205} 206 207; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 208 209declare i7 @llvm.fshr.i7(i7, i7, i7) 210define i7 @fshr_i7_const_fold() { 211; CHECK-LABEL: fshr_i7_const_fold: 212; CHECK: // %bb.0: 213; CHECK-NEXT: mov w0, #31 214; CHECK-NEXT: ret 215 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 216 ret i7 %f 217} 218 219define i8 @fshr_i8_const_fold_overshift_1() { 220; CHECK-LABEL: fshr_i8_const_fold_overshift_1: 221; CHECK: // %bb.0: 222; CHECK-NEXT: mov w0, #254 223; CHECK-NEXT: ret 224 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) 225 ret i8 %f 226} 227 228define i8 @fshr_i8_const_fold_overshift_2() { 229; CHECK-LABEL: fshr_i8_const_fold_overshift_2: 230; CHECK: // %bb.0: 231; CHECK-NEXT: mov w0, #225 232; CHECK-NEXT: ret 233 %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) 234 ret i8 %f 235} 236 237define i8 @fshr_i8_const_fold_overshift_3() { 238; CHECK-LABEL: fshr_i8_const_fold_overshift_3: 239; CHECK: // %bb.0: 240; CHECK-NEXT: mov w0, #255 241; CHECK-NEXT: ret 242 %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) 243 ret i8 %f 244} 245 246; With constant shift amount, this is 'extr'. 247 248define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { 249; CHECK-LABEL: fshr_i32_const_shift: 250; CHECK: // %bb.0: 251; CHECK-NEXT: extr w0, w0, w1, #9 252; CHECK-NEXT: ret 253 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 254 ret i32 %f 255} 256 257; Check modulo math on shift amount. 41-32=9. 258 259define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { 260; CHECK-LABEL: fshr_i32_const_overshift: 261; CHECK: // %bb.0: 262; CHECK-NEXT: extr w0, w0, w1, #9 263; CHECK-NEXT: ret 264 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 265 ret i32 %f 266} 267 268; 64-bit should also work. 105-64 = 41. 269 270define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { 271; CHECK-LABEL: fshr_i64_const_overshift: 272; CHECK: // %bb.0: 273; CHECK-NEXT: extr x0, x0, x1, #41 274; CHECK-NEXT: ret 275 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 276 ret i64 %f 277} 278 279; This should work without any node-specific logic. 280 281define i8 @fshr_i8_const_fold() { 282; CHECK-LABEL: fshr_i8_const_fold: 283; CHECK: // %bb.0: 284; CHECK-NEXT: mov w0, #254 285; CHECK-NEXT: ret 286 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 287 ret i8 %f 288} 289 290define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { 291; CHECK-LABEL: fshl_i32_shift_by_bitwidth: 292; CHECK: // %bb.0: 293; CHECK-NEXT: ret 294 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 295 ret i32 %f 296} 297 298define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { 299; CHECK-LABEL: fshr_i32_shift_by_bitwidth: 300; CHECK: // %bb.0: 301; CHECK-NEXT: mov w0, w1 302; CHECK-NEXT: ret 303 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 304 ret i32 %f 305} 306 307define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 308; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 309; CHECK: // %bb.0: 310; CHECK-NEXT: ret 311 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 312 ret <4 x i32> %f 313} 314 315define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 316; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth: 317; CHECK: // %bb.0: 318; CHECK-NEXT: mov v0.16b, v1.16b 319; CHECK-NEXT: ret 320 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 321 ret <4 x i32> %f 322} 323 324