1; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 2 3define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind { 4;CHECK-LABEL: vshlls8: 5;CHECK: vshll.s8 6 %tmp1 = load <8 x i8>, <8 x i8>* %A 7 %sext = sext <8 x i8> %tmp1 to <8 x i16> 8 %shift = shl <8 x i16> %sext, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 9 ret <8 x i16> %shift 10} 11 12define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind { 13;CHECK-LABEL: vshlls16: 14;CHECK: vshll.s16 15 %tmp1 = load <4 x i16>, <4 x i16>* %A 16 %sext = sext <4 x i16> %tmp1 to <4 x i32> 17 %shift = shl <4 x i32> %sext, <i32 15, i32 15, i32 15, i32 15> 18 ret <4 x i32> %shift 19} 20 21define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind { 22;CHECK-LABEL: vshlls32: 23;CHECK: vshll.s32 24 %tmp1 = load <2 x i32>, <2 x i32>* %A 25 %sext = sext <2 x i32> %tmp1 to <2 x i64> 26 %shift = shl <2 x i64> %sext, <i64 31, i64 31> 27 ret <2 x i64> %shift 28} 29 30define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind { 31;CHECK-LABEL: vshllu8: 32;CHECK: vshll.u8 33 %tmp1 = load <8 x i8>, <8 x i8>* %A 34 %zext = zext <8 x i8> %tmp1 to <8 x i16> 35 %shift = shl <8 x i16> %zext, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 36 ret <8 x i16> %shift 37} 38 39define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind { 40;CHECK-LABEL: vshllu16: 41;CHECK: vshll.u16 42 %tmp1 = load <4 x i16>, <4 x i16>* %A 43 %zext = zext <4 x i16> %tmp1 to <4 x i32> 44 %shift = shl <4 x i32> %zext, <i32 15, i32 15, i32 15, i32 15> 45 ret <4 x i32> %shift 46} 47 48define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind { 49;CHECK-LABEL: vshllu32: 50;CHECK: vshll.u32 51 %tmp1 = load <2 x i32>, <2 x i32>* %A 52 %zext = zext <2 x i32> %tmp1 to <2 x i64> 53 %shift = shl <2 x i64> %zext, <i64 31, i64 31> 54 ret <2 x i64> %shift 55} 56 57; The following tests use the maximum shift count, so the signedness is 58; irrelevant. Test both signed and unsigned versions. 59define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind { 60;CHECK-LABEL: vshlli8: 61;CHECK: vshll.i8 62 %tmp1 = load <8 x i8>, <8 x i8>* %A 63 %sext = sext <8 x i8> %tmp1 to <8 x i16> 64 %shift = shl <8 x i16> %sext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 65 ret <8 x i16> %shift 66} 67 68define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind { 69;CHECK-LABEL: vshlli16: 70;CHECK: vshll.i16 71 %tmp1 = load <4 x i16>, <4 x i16>* %A 72 %zext = zext <4 x i16> %tmp1 to <4 x i32> 73 %shift = shl <4 x i32> %zext, <i32 16, i32 16, i32 16, i32 16> 74 ret <4 x i32> %shift 75} 76 77define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind { 78;CHECK-LABEL: vshlli32: 79;CHECK: vshll.i32 80 %tmp1 = load <2 x i32>, <2 x i32>* %A 81 %zext = zext <2 x i32> %tmp1 to <2 x i64> 82 %shift = shl <2 x i64> %zext, <i64 32, i64 32> 83 ret <2 x i64> %shift 84} 85 86; And these have a shift just out of range so separate vmovl and vshl 87; instructions are needed. 88define <8 x i16> @vshllu8_bad(<8 x i8>* %A) nounwind { 89; CHECK-LABEL: vshllu8_bad: 90; CHECK: vmovl.u8 91; CHECK: vshl.i16 92 %tmp1 = load <8 x i8>, <8 x i8>* %A 93 %zext = zext <8 x i8> %tmp1 to <8 x i16> 94 %shift = shl <8 x i16> %zext, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9> 95 ret <8 x i16> %shift 96} 97 98define <4 x i32> @vshlls16_bad(<4 x i16>* %A) nounwind { 99; CHECK-LABEL: vshlls16_bad: 100; CHECK: vmovl.s16 101; CHECK: vshl.i32 102 %tmp1 = load <4 x i16>, <4 x i16>* %A 103 %sext = sext <4 x i16> %tmp1 to <4 x i32> 104 %shift = shl <4 x i32> %sext, <i32 17, i32 17, i32 17, i32 17> 105 ret <4 x i32> %shift 106} 107 108define <2 x i64> @vshllu32_bad(<2 x i32>* %A) nounwind { 109; CHECK-LABEL: vshllu32_bad: 110; CHECK: vmovl.u32 111; CHECK: vshl.i64 112 %tmp1 = load <2 x i32>, <2 x i32>* %A 113 %zext = zext <2 x i32> %tmp1 to <2 x i64> 114 %shift = shl <2 x i64> %zext, <i64 33, i64 33> 115 ret <2 x i64> %shift 116} 117