• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
2
3define i32 @test_rev_w(i32 %a) nounwind {
4entry:
5; CHECK-LABEL: test_rev_w:
6; CHECK: rev w0, w0
7  %0 = tail call i32 @llvm.bswap.i32(i32 %a)
8  ret i32 %0
9}
10
11define i64 @test_rev_x(i64 %a) nounwind {
12entry:
13; CHECK-LABEL: test_rev_x:
14; CHECK: rev x0, x0
15  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
16  ret i64 %0
17}
18
19declare i32 @llvm.bswap.i32(i32) nounwind readnone
20declare i64 @llvm.bswap.i64(i64) nounwind readnone
21
22define i32 @test_rev16_w(i32 %X) nounwind {
23entry:
24; CHECK-LABEL: test_rev16_w:
25; CHECK: rev16 w0, w0
26  %tmp1 = lshr i32 %X, 8
27  %X15 = bitcast i32 %X to i32
28  %tmp4 = shl i32 %X15, 8
29  %tmp2 = and i32 %tmp1, 16711680
30  %tmp5 = and i32 %tmp4, -16777216
31  %tmp9 = and i32 %tmp1, 255
32  %tmp13 = and i32 %tmp4, 65280
33  %tmp6 = or i32 %tmp5, %tmp2
34  %tmp10 = or i32 %tmp6, %tmp13
35  %tmp14 = or i32 %tmp10, %tmp9
36  ret i32 %tmp14
37}
38
39; 64-bit REV16 is *not* a swap then a 16-bit rotation:
40;   01234567 ->(bswap) 76543210 ->(rotr) 10765432
41;   01234567 ->(rev16) 10325476
42define i64 @test_rev16_x(i64 %a) nounwind {
43entry:
44; CHECK-LABEL: test_rev16_x:
45; CHECK-NOT: rev16 x0, x0
46  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
47  %1 = lshr i64 %0, 16
48  %2 = shl i64 %0, 48
49  %3 = or i64 %1, %2
50  ret i64 %3
51}
52
53define i64 @test_rev32_x(i64 %a) nounwind {
54entry:
55; CHECK-LABEL: test_rev32_x:
56; CHECK: rev32 x0, x0
57  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
58  %1 = lshr i64 %0, 32
59  %2 = shl i64 %0, 32
60  %3 = or i64 %1, %2
61  ret i64 %3
62}
63
64define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
65;CHECK-LABEL: test_vrev64D8:
66;CHECK: rev64.8b
67	%tmp1 = load <8 x i8>, <8 x i8>* %A
68	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
69	ret <8 x i8> %tmp2
70}
71
72define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
73;CHECK-LABEL: test_vrev64D16:
74;CHECK: rev64.4h
75	%tmp1 = load <4 x i16>, <4 x i16>* %A
76	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
77	ret <4 x i16> %tmp2
78}
79
80define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
81;CHECK-LABEL: test_vrev64D32:
82;CHECK: rev64.2s
83	%tmp1 = load <2 x i32>, <2 x i32>* %A
84	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
85	ret <2 x i32> %tmp2
86}
87
88define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
89;CHECK-LABEL: test_vrev64Df:
90;CHECK: rev64.2s
91	%tmp1 = load <2 x float>, <2 x float>* %A
92	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
93	ret <2 x float> %tmp2
94}
95
96define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
97;CHECK-LABEL: test_vrev64Q8:
98;CHECK: rev64.16b
99	%tmp1 = load <16 x i8>, <16 x i8>* %A
100	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
101	ret <16 x i8> %tmp2
102}
103
104define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
105;CHECK-LABEL: test_vrev64Q16:
106;CHECK: rev64.8h
107	%tmp1 = load <8 x i16>, <8 x i16>* %A
108	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
109	ret <8 x i16> %tmp2
110}
111
112define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
113;CHECK-LABEL: test_vrev64Q32:
114;CHECK: rev64.4s
115	%tmp1 = load <4 x i32>, <4 x i32>* %A
116	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
117	ret <4 x i32> %tmp2
118}
119
120define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
121;CHECK-LABEL: test_vrev64Qf:
122;CHECK: rev64.4s
123	%tmp1 = load <4 x float>, <4 x float>* %A
124	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
125	ret <4 x float> %tmp2
126}
127
128define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
129;CHECK-LABEL: test_vrev32D8:
130;CHECK: rev32.8b
131	%tmp1 = load <8 x i8>, <8 x i8>* %A
132	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
133	ret <8 x i8> %tmp2
134}
135
136define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
137;CHECK-LABEL: test_vrev32D16:
138;CHECK: rev32.4h
139	%tmp1 = load <4 x i16>, <4 x i16>* %A
140	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
141	ret <4 x i16> %tmp2
142}
143
144define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
145;CHECK-LABEL: test_vrev32Q8:
146;CHECK: rev32.16b
147	%tmp1 = load <16 x i8>, <16 x i8>* %A
148	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
149	ret <16 x i8> %tmp2
150}
151
152define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
153;CHECK-LABEL: test_vrev32Q16:
154;CHECK: rev32.8h
155	%tmp1 = load <8 x i16>, <8 x i16>* %A
156	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
157	ret <8 x i16> %tmp2
158}
159
160define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
161;CHECK-LABEL: test_vrev16D8:
162;CHECK: rev16.8b
163	%tmp1 = load <8 x i8>, <8 x i8>* %A
164	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
165	ret <8 x i8> %tmp2
166}
167
168define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
169;CHECK-LABEL: test_vrev16Q8:
170;CHECK: rev16.16b
171	%tmp1 = load <16 x i8>, <16 x i8>* %A
172	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
173	ret <16 x i8> %tmp2
174}
175
176; Undef shuffle indices should not prevent matching to VREV:
177
178define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
179;CHECK-LABEL: test_vrev64D8_undef:
180;CHECK: rev64.8b
181	%tmp1 = load <8 x i8>, <8 x i8>* %A
182	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
183	ret <8 x i8> %tmp2
184}
185
186define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
187;CHECK-LABEL: test_vrev32Q16_undef:
188;CHECK: rev32.8h
189	%tmp1 = load <8 x i16>, <8 x i16>* %A
190	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
191	ret <8 x i16> %tmp2
192}
193
194; vrev <4 x i16> should use REV32 and not REV64
195define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
196; CHECK-LABEL: test_vrev64:
197; CHECK: ldr [[DEST:q[0-9]+]],
198; CHECK: st1.h
199; CHECK: st1.h
200entry:
201  %0 = bitcast <4 x i16>* %source to <8 x i16>*
202  %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4
203  %tmp3 = extractelement <8 x i16> %tmp2, i32 6
204  %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
205  %tmp9 = extractelement <8 x i16> %tmp2, i32 5
206  %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
207  store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
208  ret void
209}
210
211; Test vrev of float4
212define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
213; CHECK: float_vrev64
214; CHECK: ldr [[DEST:q[0-9]+]],
215; CHECK: rev64.4s
216entry:
217  %0 = bitcast float* %source to <4 x float>*
218  %tmp2 = load <4 x float>, <4 x float>* %0, align 4
219  %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
220  %arrayidx8 = getelementptr inbounds <4 x float>, <4 x float>* %dest, i32 11
221  store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
222  ret void
223}
224
225
226define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
227; CHECK-LABEL: test_vrev32_bswap:
228; CHECK: rev32.16b
229; CHECK-NOT: rev
230; CHECK: ret
231  %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
232  ret <4 x i32> %bswap
233}
234
235declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
236