1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4; CHECK: vcombine8 5; CHECK: vmov r0, r1, d16 6; CHECK: vmov r2, r3, d17 7 %tmp1 = load <8 x i8>* %A 8 %tmp2 = load <8 x i8>* %B 9 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10 ret <16 x i8> %tmp3 11} 12 13define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 14; CHECK: vcombine16 15; CHECK: vmov r0, r1, d16 16; CHECK: vmov r2, r3, d17 17 %tmp1 = load <4 x i16>* %A 18 %tmp2 = load <4 x i16>* %B 19 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 20 ret <8 x i16> %tmp3 21} 22 23define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 24; CHECK: vcombine32 25; CHECK: vmov r0, r1, d16 26; CHECK: vmov r2, r3, d17 27 %tmp1 = load <2 x i32>* %A 28 %tmp2 = load <2 x i32>* %B 29 %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 30 ret <4 x i32> %tmp3 31} 32 33define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind { 34; CHECK: vcombinefloat 35; CHECK: vmov r0, r1, d16 36; CHECK: vmov r2, r3, d17 37 %tmp1 = load <2 x float>* %A 38 %tmp2 = load <2 x float>* %B 39 %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 40 ret <4 x float> %tmp3 41} 42 43define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 44; CHECK: vcombine64 45; CHECK: vmov r0, r1, d16 46; CHECK: vmov r2, r3, d17 47 %tmp1 = load <1 x i64>* %A 48 %tmp2 = load <1 x i64>* %B 49 %tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1> 50 ret <2 x i64> %tmp3 51} 52 53; Check for vget_low and vget_high implemented with shufflevector. PR8411. 54; They should not require storing to the stack. 55 56define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind { 57; CHECK: vget_low16 58; CHECK-NOT: vst 59; CHECK: vmov r0, r1, d16 60 %tmp1 = load <8 x i16>* %A 61 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 62 ret <4 x i16> %tmp2 63} 64 65define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind { 66; CHECK: vget_high8 67; CHECK-NOT: vst 68; CHECK: vmov r0, r1, d17 69 %tmp1 = load <16 x i8>* %A 70 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 71 ret <8 x i8> %tmp2 72} 73