1; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE 2; RUN: llc -mtriple=armeb-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE 3 4define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 5; CHECK-LABEL: vcombine8 6; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0] 7; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 8 9; CHECK-LE-DAG: vmov r0, r1, [[LD0]] 10; CHECK-LE-DAG: vmov r2, r3, [[LD1]] 11 12; CHECK-BE-DAG: vmov r1, r0, d16 13; CHECK-BE-DAG: vmov r3, r2, d17 14 %tmp1 = load <8 x i8>, <8 x i8>* %A 15 %tmp2 = load <8 x i8>, <8 x i8>* %B 16 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 17 ret <16 x i8> %tmp3 18} 19 20define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 21; CHECK-LABEL: vcombine16 22; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0] 23; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 24 25; CHECK-LE-DAG: vmov r0, r1, [[LD0]] 26; CHECK-LE-DAG: vmov r2, r3, [[LD1]] 27 28; CHECK-BE-DAG: vmov r1, r0, d16 29; CHECK-BE-DAG: vmov r3, r2, d17 30 %tmp1 = load <4 x i16>, <4 x i16>* %A 31 %tmp2 = load <4 x i16>, <4 x i16>* %B 32 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 33 ret <8 x i16> %tmp3 34} 35 36define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 37; CHECK-LABEL: vcombine32 38 39; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0] 40; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 41 42; CHECK-LE: vmov r0, r1, [[LD0]] 43; CHECK-LE: vmov r2, r3, [[LD1]] 44 45; CHECK-BE: vmov r1, r0, d16 46; CHECK-BE: vmov r3, r2, d17 47 %tmp1 = load <2 x i32>, <2 x i32>* %A 48 %tmp2 = load <2 x i32>, <2 x i32>* %B 49 %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 50 ret <4 x i32> %tmp3 51} 52 53define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind { 54; CHECK-LABEL: vcombinefloat 55 56; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0] 57; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 58 59; CHECK-LE: vmov r0, r1, [[LD0]] 60; CHECK-LE: vmov r2, r3, [[LD1]] 61 62; CHECK-BE: vmov r1, r0, d16 63; CHECK-BE: vmov r3, r2, d17 64 %tmp1 = load <2 x float>, <2 x float>* %A 65 %tmp2 = load <2 x float>, <2 x float>* %B 66 %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 67 ret <4 x float> %tmp3 68} 69 70define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 71; CHECK-LABEL: vcombine64 72; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0] 73; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 74 75; CHECK-LE: vmov r0, r1, [[LD0]] 76; CHECK-LE: vmov r2, r3, [[LD1]] 77 78; CHECK-BE: vmov r1, r0, [[LD0]] 79; CHECK-BE: vmov r3, r2, [[LD1]] 80 %tmp1 = load <1 x i64>, <1 x i64>* %A 81 %tmp2 = load <1 x i64>, <1 x i64>* %B 82 %tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1> 83 ret <2 x i64> %tmp3 84} 85 86; Check for vget_low and vget_high implemented with shufflevector. PR8411. 87; They should not require storing to the stack. 88 89define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind { 90; CHECK: vget_low16 91; CHECK-NOT: vst 92; CHECK-LE: vmov r0, r1, d16 93; CHECK-BE: vmov r1, r0, d16 94 %tmp1 = load <8 x i16>, <8 x i16>* %A 95 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 96 ret <4 x i16> %tmp2 97} 98 99define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind { 100; CHECK: vget_high8 101; CHECK-NOT: vst 102; CHECK-LE: vmov r0, r1, d17 103; CHECK-BE: vmov r1, r0, d16 104 %tmp1 = load <16 x i8>, <16 x i8>* %A 105 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 106 ret <8 x i8> %tmp2 107} 108