• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7; All these tests create a vector tuple, insert z5 into one of the elements,
8; and finally extracts that element from the wide vector to return it.  These
9; checks ensure that z5 is always the value that is returned.
10
11;
12; Insert into two element tuples
13;
14
15; tuple:      { tuple2.res0, tuple2.res1 }
16; insert z5:  {     z5     , tuple2.res1 }
17; extract z5:       ^^
18define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
19                                                   <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
20                                                   <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
21  ; CHECK-LABEL: set_tuple2_nxv8i32_elt0:
22  ; CHECK-NEXT:  mov     z0.d, z5.d
23  ; CHECK-NEXT:  ret
24  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
25  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
26  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
27  ret <vscale x 4 x i32> %ext
28}
29
30; tuple:       { tuple2.res0, tuple2.res1 }
31; insert z5:   { tuple2.res0,     z5      }
32; extract z5:                     ^^
33define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
34                                                   <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
35                                                   <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
36  ; CHECK-LABEL: set_tuple2_nxv8i32_elt1:
37  ; CHECK-NEXT:  mov     z0.d, z5.d
38  ; CHECK-NEXT:  ret
39  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
40  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
41  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 1)
42  ret <vscale x 4 x i32> %ext
43}
44
45; This test checks the elements _not_ being set aren't changed.
46
47; tuple:       { tuple2.res0, tuple2.res1 }
48; insert z5:   { tuple2.res0,     z5      }
49; extract z0:         ^^
50define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
51                                                            <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
52                                                            <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
53  ; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0:
54  ; CHECK-NEXT:  ret
55  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
56  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
57  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
58  ret <vscale x 4 x i32> %ext
59}
60
61; Test extract of tuple passed into function
62define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) #0 {
63  ; CHECK-LABEL: get_tuple2_nxv8i32_elt1:
64  ; CHECK-NEXT:  mov     z0.d, z1.d
65  ; CHECK-NEXT:  ret
66  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1)
67  ret <vscale x 4 x i32> %ext
68}
69
70;
71; Insert into three element tuples
72;
73
74; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
75; insert z5:   {     z5     , tuple3.res0, tuple3.res2 }
76; extract z5:        ^^
77define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
78                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
79                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
80  ; CHECK-LABEL: set_tuple3_nxv12i32_elt0:
81  ; CHECK-NEXT:  mov     z0.d, z5.d
82  ; CHECK-NEXT:  ret
83  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
84  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
85  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 0)
86  ret <vscale x 4 x i32> %ext
87}
88
89; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
90; insert z5:   { tuple3.res0,     z5     , tuple3.res2 }
91; extract z5:                     ^^
92define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
93                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
94                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
95  ; CHECK-LABEL: set_tuple3_nxv12i32_elt1:
96  ; CHECK-NEXT:  mov     z0.d, z5.d
97  ; CHECK-NEXT:  ret
98  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
99  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
100  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 1)
101  ret <vscale x 4 x i32> %ext
102}
103
104; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
105; insert z5:   { tuple3.res0, tuple3.res1,     z5      }
106; extract z5:                                  ^^
107define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
108                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
109                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
110  ; CHECK-LABEL: set_tuple3_nxv12i32_elt2:
111  ; CHECK-NEXT:  mov     z0.d, z5.d
112  ; CHECK-NEXT:  ret
113  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
114  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
115  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
116  ret <vscale x 4 x i32> %ext
117}
118
119; This test checks the elements _not_ being set aren't changed.
120
121; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
122; insert z5:   { tuple3.res0,     z5     , tuple3.res2 }
123; extract z2:                                  ^^
124define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
125                                                             <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
126                                                             <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
127  ; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2:
128  ; CHECK-NEXT:  mov     z0.d, z2.d
129  ; CHECK-NEXT:  ret
130  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
131  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
132  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
133  ret <vscale x 4 x i32> %ext
134}
135
136; Test extract of tuple passed into function
137define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) #0 {
138  ; CHECK-LABEL: get_tuple3_nxv12i32_elt2:
139  ; CHECK-NEXT:  mov     z0.d, z3.d
140  ; CHECK-NEXT:  ret
141  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %tuple, i32 2)
142  ret <vscale x 4 x i32> %ext
143}
144
145;
146; Insert into four element tuples
147;
148
149; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
150; insert z5:   {     z5     , tuple4.res1, tuple4.res2, tuple4.res3 }
151; extract z5:        ^^
152define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
153                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
154                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
155  ; CHECK-LABEL: set_tuple4_nxv16i32_elt0:
156  ; CHECK-NEXT:  mov     z0.d, z5.d
157  ; CHECK-NEXT:  ret
158  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
159  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
160  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 0)
161  ret <vscale x 4 x i32> %ext
162}
163
164; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
165; insert z5:   { tuple4.res0,     z5     , tuple4.res2, tuple4.res3 }
166; extract z5:                     ^^
167define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
168                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
169                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
170  ; CHECK-LABEL: set_tuple4_nxv16i32_elt1:
171  ; CHECK-NEXT:  mov     z0.d, z5.d
172  ; CHECK-NEXT:  ret
173  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
174  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
175  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 1)
176  ret <vscale x 4 x i32> %ext
177}
178
179; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
180; insert z5:   { tuple4.res0, tuple4.res1,     z5     , tuple4.res3 }
181; extract z5:                                  ^^
182define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
183                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
184                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
185  ; CHECK-LABEL: set_tuple4_nxv16i32_elt2:
186  ; CHECK-NEXT:  mov     z0.d, z5.d
187  ; CHECK-NEXT:  ret
188  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
189  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
190  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
191  ret <vscale x 4 x i32> %ext
192}
193
194; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
195; insert z5:   { tuple4.res0, tuple4.res1, tuple4.res2,     z5      }
196; extract z5:                                               ^^
197define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
198                                                    <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
199                                                    <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
200  ; CHECK-LABEL: set_tuple4_nxv16i32_elt3:
201  ; CHECK-NEXT:  mov     z0.d, z5.d
202  ; CHECK-NEXT:  ret
203  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
204  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
205  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 3)
206  ret <vscale x 4 x i32> %ext
207}
208
209; This test checks the elements _not_ being set aren't changed.
210
211; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
212; insert z5:   { tuple4.res0, tuple4.res1, tuple4.res2,     z5      }
213; extract z2:                                               ^^
214define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
215                                                             <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
216                                                             <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
217  ; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2:
218  ; CHECK-NEXT:  mov     z0.d, z2.d
219  ; CHECK-NEXT:  ret
220  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
221  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
222  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
223  ret <vscale x 4 x i32> %ext
224}
225
226; Test extract of tuple passed into function
227define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) #0 {
228  ; CHECK-LABEL: get_tuple4_nxv16i32_elt3:
229  ; CHECK-NEXT:  mov     z0.d, z3.d
230  ; CHECK-NEXT:  ret
231  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %tuple, i32 3)
232  ret <vscale x 4 x i32> %ext
233}
234
235attributes #0 = { nounwind "target-features"="+sve" }
236
237declare <vscale x 8 x i32>  @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
238declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>)
239declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32)
240
241declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
242declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32>, i32, <vscale x 4 x i32>)
243declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32>, i32)
244
245declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
246declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32>, i32, <vscale x 4 x i32>)
247declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32>, i32)
248