• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
4; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
5; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
6
7; Testing for ds_read/write_128
8; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=SI,FUNC %s
9; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
10; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
11
12; FUNC-LABEL: {{^}}local_load_i32:
13; GCN-NOT: s_wqm_b64
14; SICIVI: s_mov_b32 m0, -1
15; GFX9-NOT: m0
16; GCN: ds_read_b32
17
18; EG: LDS_READ_RET
19define amdgpu_kernel void @local_load_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
20entry:
21  %ld = load i32, i32 addrspace(3)* %in
22  store i32 %ld, i32 addrspace(3)* %out
23  ret void
24}
25
26; FUNC-LABEL: {{^}}local_load_v2i32:
27; SICIVI: s_mov_b32 m0, -1
28; GFX9-NOT: m0
29
30; GCN: ds_read_b64
31define amdgpu_kernel void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
32entry:
33  %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
34  store <2 x i32> %ld, <2 x i32> addrspace(3)* %out
35  ret void
36}
37
38; FUNC-LABEL: {{^}}local_load_v3i32:
39; SICIVI: s_mov_b32 m0, -1
40; GFX9-NOT: m0
41
42; SI-DAG: ds_read_b64
43; SI-DAG: ds_read_b32
44; CIVI-DAG: ds_read_b96
45define amdgpu_kernel void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 {
46entry:
47  %ld = load <3 x i32>, <3 x i32> addrspace(3)* %in
48  store <3 x i32> %ld, <3 x i32> addrspace(3)* %out
49  ret void
50}
51
52; FUNC-LABEL: {{^}}local_load_v4i32:
53; SICIVI: s_mov_b32 m0, -1
54; GFX9-NOT: m0
55
56; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
57
58define amdgpu_kernel void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
59entry:
60  %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
61  store <4 x i32> %ld, <4 x i32> addrspace(3)* %out
62  ret void
63}
64
65; FUNC-LABEL: {{^}}local_load_v8i32:
66; SICIVI: s_mov_b32 m0, -1
67; GFX9-NOT: m0
68
69; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
70; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
71define amdgpu_kernel void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
72entry:
73  %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
74  store <8 x i32> %ld, <8 x i32> addrspace(3)* %out
75  ret void
76}
77
78; FUNC-LABEL: {{^}}local_load_v16i32:
79; SICIVI: s_mov_b32 m0, -1
80; GFX9-NOT: m0
81
82; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7{{$}}
83; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5{{$}}
84; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
85; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
86; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
87; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
88; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
89; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
90define amdgpu_kernel void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
91entry:
92  %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
93  store <16 x i32> %ld, <16 x i32> addrspace(3)* %out
94  ret void
95}
96
97; FUNC-LABEL: {{^}}local_zextload_i32_to_i64:
98; SICIVI: s_mov_b32 m0, -1
99; GFX9-NOT: m0
100
101define amdgpu_kernel void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
102  %ld = load i32, i32 addrspace(3)* %in
103  %ext = zext i32 %ld to i64
104  store i64 %ext, i64 addrspace(3)* %out
105  ret void
106}
107
108; FUNC-LABEL: {{^}}local_sextload_i32_to_i64:
109; SICIVI: s_mov_b32 m0, -1
110; GFX9-NOT: m0
111
112define amdgpu_kernel void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
113  %ld = load i32, i32 addrspace(3)* %in
114  %ext = sext i32 %ld to i64
115  store i64 %ext, i64 addrspace(3)* %out
116  ret void
117}
118
119; FUNC-LABEL: {{^}}local_zextload_v1i32_to_v1i64:
120; SICIVI: s_mov_b32 m0, -1
121; GFX9-NOT: m0
122
123define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
124  %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
125  %ext = zext <1 x i32> %ld to <1 x i64>
126  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
127  ret void
128}
129
130; FUNC-LABEL: {{^}}local_sextload_v1i32_to_v1i64:
131; SICIVI: s_mov_b32 m0, -1
132; GFX9-NOT: m0
133
134define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
135  %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
136  %ext = sext <1 x i32> %ld to <1 x i64>
137  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
138  ret void
139}
140
141; FUNC-LABEL: {{^}}local_zextload_v2i32_to_v2i64:
142; SICIVI: s_mov_b32 m0, -1
143; GFX9-NOT: m0
144
145define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
146  %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
147  %ext = zext <2 x i32> %ld to <2 x i64>
148  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
149  ret void
150}
151
152; FUNC-LABEL: {{^}}local_sextload_v2i32_to_v2i64:
153; SICIVI: s_mov_b32 m0, -1
154; GFX9-NOT: m0
155
156define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
157  %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
158  %ext = sext <2 x i32> %ld to <2 x i64>
159  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
160  ret void
161}
162
163; FUNC-LABEL: {{^}}local_zextload_v4i32_to_v4i64:
164; SICIVI: s_mov_b32 m0, -1
165; GFX9-NOT: m0
166
167define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
168  %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
169  %ext = zext <4 x i32> %ld to <4 x i64>
170  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
171  ret void
172}
173
174; FUNC-LABEL: {{^}}local_sextload_v4i32_to_v4i64:
175; SICIVI: s_mov_b32 m0, -1
176; GFX9-NOT: m0
177
178define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
179  %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
180  %ext = sext <4 x i32> %ld to <4 x i64>
181  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
182  ret void
183}
184
185; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load.
186; FUNC-LABEL: {{^}}local_v4i32_to_128:
187
188; SI-NOT: ds_read_b128
189; SI-NOT: ds_write_b128
190
191; CIVI: ds_read_b128
192; CIVI: ds_write_b128
193
194; EG: LDS_READ_RET
195; EG: LDS_READ_RET
196; EG: LDS_READ_RET
197; EG: LDS_READ_RET
198define amdgpu_kernel void @local_v4i32_to_128(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) {
199  %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in, align 16
200  store <4 x i32> %ld, <4 x i32> addrspace(3)* %out, align 16
201  ret void
202}
203
204; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64:
205; SICIVI: s_mov_b32 m0, -1
206; GFX9-NOT: m0
207
208define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
209  %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
210  %ext = zext <8 x i32> %ld to <8 x i64>
211  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
212  ret void
213}
214
215; FUNC-LABEL: {{^}}local_sextload_v8i32_to_v8i64:
216; SICIVI: s_mov_b32 m0, -1
217; GFX9-NOT: m0
218
219define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
220  %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
221  %ext = sext <8 x i32> %ld to <8 x i64>
222  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
223  ret void
224}
225
226; FUNC-LABEL: {{^}}local_sextload_v16i32_to_v16i64:
227; SICIVI: s_mov_b32 m0, -1
228; GFX9-NOT: m0
229
230define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
231  %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
232  %ext = sext <16 x i32> %ld to <16 x i64>
233  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
234  ret void
235}
236
237; FUNC-LABEL: {{^}}local_zextload_v16i32_to_v16i64
238; SICIVI: s_mov_b32 m0, -1
239; GFX9-NOT: m0
240
241define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
242  %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
243  %ext = zext <16 x i32> %ld to <16 x i64>
244  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
245  ret void
246}
247
248; FUNC-LABEL: {{^}}local_sextload_v32i32_to_v32i64:
249; SICIVI: s_mov_b32 m0, -1
250; GFX9-NOT: m0
251
252define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
253  %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
254  %ext = sext <32 x i32> %ld to <32 x i64>
255  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
256  ret void
257}
258
259; FUNC-LABEL: {{^}}local_zextload_v32i32_to_v32i64:
260; SICIVI: s_mov_b32 m0, -1
261; GFX9-NOT: m0
262
263define amdgpu_kernel void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
264  %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
265  %ext = zext <32 x i32> %ld to <32 x i64>
266  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
267  ret void
268}
269
270; FUNC-LABEL: {{^}}local_load_v32i32:
271; SICIVI: s_mov_b32 m0, -1
272; GFX9-NOT: m0
273; GFX9-NOT: accvgpr
274
275define amdgpu_kernel void @local_load_v32i32(<32 x i32> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
276  %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
277  store <32 x i32> %ld, <32 x i32> addrspace(3)* %out
278  ret void
279}
280
281attributes #0 = { nounwind }
282