1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 2 3define i32 @test_rev_w(i32 %a) nounwind { 4entry: 5; CHECK-LABEL: test_rev_w: 6; CHECK: rev w0, w0 7 %0 = tail call i32 @llvm.bswap.i32(i32 %a) 8 ret i32 %0 9} 10 11define i64 @test_rev_x(i64 %a) nounwind { 12entry: 13; CHECK-LABEL: test_rev_x: 14; CHECK: rev x0, x0 15 %0 = tail call i64 @llvm.bswap.i64(i64 %a) 16 ret i64 %0 17} 18 19declare i32 @llvm.bswap.i32(i32) nounwind readnone 20declare i64 @llvm.bswap.i64(i64) nounwind readnone 21 22define i32 @test_rev16_w(i32 %X) nounwind { 23entry: 24; CHECK-LABEL: test_rev16_w: 25; CHECK: rev16 w0, w0 26 %tmp1 = lshr i32 %X, 8 27 %X15 = bitcast i32 %X to i32 28 %tmp4 = shl i32 %X15, 8 29 %tmp2 = and i32 %tmp1, 16711680 30 %tmp5 = and i32 %tmp4, -16777216 31 %tmp9 = and i32 %tmp1, 255 32 %tmp13 = and i32 %tmp4, 65280 33 %tmp6 = or i32 %tmp5, %tmp2 34 %tmp10 = or i32 %tmp6, %tmp13 35 %tmp14 = or i32 %tmp10, %tmp9 36 ret i32 %tmp14 37} 38 39; 64-bit REV16 is *not* a swap then a 16-bit rotation: 40; 01234567 ->(bswap) 76543210 ->(rotr) 10765432 41; 01234567 ->(rev16) 10325476 42define i64 @test_rev16_x(i64 %a) nounwind { 43entry: 44; CHECK-LABEL: test_rev16_x: 45; CHECK-NOT: rev16 x0, x0 46 %0 = tail call i64 @llvm.bswap.i64(i64 %a) 47 %1 = lshr i64 %0, 16 48 %2 = shl i64 %0, 48 49 %3 = or i64 %1, %2 50 ret i64 %3 51} 52 53define i64 @test_rev32_x(i64 %a) nounwind { 54entry: 55; CHECK-LABEL: test_rev32_x: 56; CHECK: rev32 x0, x0 57 %0 = tail call i64 @llvm.bswap.i64(i64 %a) 58 %1 = lshr i64 %0, 32 59 %2 = shl i64 %0, 32 60 %3 = or i64 %1, %2 61 ret i64 %3 62} 63 64define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind { 65;CHECK-LABEL: test_vrev64D8: 66;CHECK: rev64.8b 67 %tmp1 = load <8 x i8>, <8 x i8>* %A 68 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 69 ret <8 x i8> %tmp2 70} 71 72define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind { 73;CHECK-LABEL: test_vrev64D16: 74;CHECK: rev64.4h 75 %tmp1 = load <4 x i16>, <4 x i16>* %A 76 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 77 ret <4 x i16> %tmp2 78} 79 80define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind { 81;CHECK-LABEL: test_vrev64D32: 82;CHECK: rev64.2s 83 %tmp1 = load <2 x i32>, <2 x i32>* %A 84 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0> 85 ret <2 x i32> %tmp2 86} 87 88define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind { 89;CHECK-LABEL: test_vrev64Df: 90;CHECK: rev64.2s 91 %tmp1 = load <2 x float>, <2 x float>* %A 92 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0> 93 ret <2 x float> %tmp2 94} 95 96define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind { 97;CHECK-LABEL: test_vrev64Q8: 98;CHECK: rev64.16b 99 %tmp1 = load <16 x i8>, <16 x i8>* %A 100 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 101 ret <16 x i8> %tmp2 102} 103 104define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind { 105;CHECK-LABEL: test_vrev64Q16: 106;CHECK: rev64.8h 107 %tmp1 = load <8 x i16>, <8 x i16>* %A 108 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 109 ret <8 x i16> %tmp2 110} 111 112define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind { 113;CHECK-LABEL: test_vrev64Q32: 114;CHECK: rev64.4s 115 %tmp1 = load <4 x i32>, <4 x i32>* %A 116 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 117 ret <4 x i32> %tmp2 118} 119 120define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind { 121;CHECK-LABEL: test_vrev64Qf: 122;CHECK: rev64.4s 123 %tmp1 = load <4 x float>, <4 x float>* %A 124 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 125 ret <4 x float> %tmp2 126} 127 128define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind { 129;CHECK-LABEL: test_vrev32D8: 130;CHECK: rev32.8b 131 %tmp1 = load <8 x i8>, <8 x i8>* %A 132 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 133 ret <8 x i8> %tmp2 134} 135 136define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind { 137;CHECK-LABEL: test_vrev32D16: 138;CHECK: rev32.4h 139 %tmp1 = load <4 x i16>, <4 x i16>* %A 140 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 141 ret <4 x i16> %tmp2 142} 143 144define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind { 145;CHECK-LABEL: test_vrev32Q8: 146;CHECK: rev32.16b 147 %tmp1 = load <16 x i8>, <16 x i8>* %A 148 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 149 ret <16 x i8> %tmp2 150} 151 152define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind { 153;CHECK-LABEL: test_vrev32Q16: 154;CHECK: rev32.8h 155 %tmp1 = load <8 x i16>, <8 x i16>* %A 156 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 157 ret <8 x i16> %tmp2 158} 159 160define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind { 161;CHECK-LABEL: test_vrev16D8: 162;CHECK: rev16.8b 163 %tmp1 = load <8 x i8>, <8 x i8>* %A 164 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 165 ret <8 x i8> %tmp2 166} 167 168define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind { 169;CHECK-LABEL: test_vrev16Q8: 170;CHECK: rev16.16b 171 %tmp1 = load <16 x i8>, <16 x i8>* %A 172 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 173 ret <16 x i8> %tmp2 174} 175 176; Undef shuffle indices should not prevent matching to VREV: 177 178define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind { 179;CHECK-LABEL: test_vrev64D8_undef: 180;CHECK: rev64.8b 181 %tmp1 = load <8 x i8>, <8 x i8>* %A 182 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0> 183 ret <8 x i8> %tmp2 184} 185 186define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind { 187;CHECK-LABEL: test_vrev32Q16_undef: 188;CHECK: rev32.8h 189 %tmp1 = load <8 x i16>, <8 x i16>* %A 190 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef> 191 ret <8 x i16> %tmp2 192} 193 194; vrev <4 x i16> should use REV32 and not REV64 195define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp { 196; CHECK-LABEL: test_vrev64: 197; CHECK: ldr [[DEST:q[0-9]+]], 198; CHECK: st1.h 199; CHECK: st1.h 200entry: 201 %0 = bitcast <4 x i16>* %source to <8 x i16>* 202 %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4 203 %tmp3 = extractelement <8 x i16> %tmp2, i32 6 204 %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0 205 %tmp9 = extractelement <8 x i16> %tmp2, i32 5 206 %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1 207 store <2 x i16> %tmp11, <2 x i16>* %dst, align 4 208 ret void 209} 210 211; Test vrev of float4 212define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp { 213; CHECK: float_vrev64 214; CHECK: ldr [[DEST:q[0-9]+]], 215; CHECK: rev64.4s 216entry: 217 %0 = bitcast float* %source to <4 x float>* 218 %tmp2 = load <4 x float>, <4 x float>* %0, align 4 219 %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0> 220 %arrayidx8 = getelementptr inbounds <4 x float>, <4 x float>* %dest, i32 11 221 store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4 222 ret void 223} 224 225 226define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind { 227; CHECK-LABEL: test_vrev32_bswap: 228; CHECK: rev32.16b 229; CHECK-NOT: rev 230; CHECK: ret 231 %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source) 232 ret <4 x i32> %bswap 233} 234 235declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone 236