• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3
4declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
5declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
6
7
8; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
9; SI: ds_read_u8
10; SI: ds_read_u8
11; SI: ds_read_u8
12; SI: ds_read_u8
13; SI: ds_read_u8
14; SI: ds_read_u8
15; SI: ds_read_u8
16; SI: ds_read_u8
17
18; SI: ds_read_u8
19; SI: ds_read_u8
20; SI: ds_read_u8
21; SI: ds_read_u8
22; SI: ds_read_u8
23; SI: ds_read_u8
24; SI: ds_read_u8
25; SI: ds_read_u8
26
27; SI: ds_read_u8
28; SI: ds_read_u8
29; SI: ds_read_u8
30; SI: ds_read_u8
31; SI: ds_read_u8
32; SI: ds_read_u8
33; SI: ds_read_u8
34; SI: ds_read_u8
35
36; SI: ds_read_u8
37; SI: ds_read_u8
38; SI: ds_read_u8
39; SI: ds_read_u8
40; SI: ds_read_u8
41; SI: ds_read_u8
42; SI: ds_read_u8
43; SI: ds_read_u8
44
45; SI: ds_write_b8
46; SI: ds_write_b8
47; SI: ds_write_b8
48; SI: ds_write_b8
49; SI: ds_write_b8
50; SI: ds_write_b8
51; SI: ds_write_b8
52; SI: ds_write_b8
53
54; SI: ds_write_b8
55; SI: ds_write_b8
56; SI: ds_write_b8
57; SI: ds_write_b8
58; SI: ds_write_b8
59; SI: ds_write_b8
60; SI: ds_write_b8
61; SI: ds_write_b8
62
63; SI: ds_write_b8
64; SI: ds_write_b8
65; SI: ds_write_b8
66; SI: ds_write_b8
67; SI: ds_write_b8
68; SI: ds_write_b8
69; SI: ds_write_b8
70; SI: ds_write_b8
71
72; SI: ds_write_b8
73; SI: ds_write_b8
74; SI: ds_write_b8
75; SI: ds_write_b8
76; SI: ds_write_b8
77; SI: ds_write_b8
78; SI: ds_write_b8
79; SI: ds_write_b8
80
81; SI: s_endpgm
82define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
83  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
84  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
85  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
86  ret void
87}
88
89; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
90; SI: ds_read_u16
91; SI: ds_read_u16
92; SI: ds_read_u16
93; SI: ds_read_u16
94; SI: ds_read_u16
95; SI: ds_read_u16
96; SI: ds_read_u16
97; SI: ds_read_u16
98
99; SI: ds_read_u16
100; SI: ds_read_u16
101; SI: ds_read_u16
102; SI: ds_read_u16
103; SI: ds_read_u16
104; SI: ds_read_u16
105; SI: ds_read_u16
106; SI: ds_read_u16
107
108; SI: ds_write_b16
109; SI: ds_write_b16
110; SI: ds_write_b16
111; SI: ds_write_b16
112; SI: ds_write_b16
113; SI: ds_write_b16
114; SI: ds_write_b16
115; SI: ds_write_b16
116
117; SI: ds_write_b16
118; SI: ds_write_b16
119; SI: ds_write_b16
120; SI: ds_write_b16
121; SI: ds_write_b16
122; SI: ds_write_b16
123; SI: ds_write_b16
124; SI: ds_write_b16
125
126; SI: s_endpgm
127define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
128  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
129  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
130  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
131  ret void
132}
133
134; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
135; SI-DAG: ds_read_b32
136; SI-DAG: ds_write_b32
137
138; SI-DAG: ds_read_b32
139; SI-DAG: ds_write_b32
140
141; SI-DAG: ds_read_b32
142; SI-DAG: ds_write_b32
143
144; SI-DAG: ds_read_b32
145; SI-DAG: ds_write_b32
146
147; SI-DAG: ds_read_b32
148; SI-DAG: ds_write_b32
149
150; SI-DAG: ds_read_b32
151; SI-DAG: ds_write_b32
152
153; SI-DAG: ds_read_b32
154; SI-DAG: ds_write_b32
155
156; SI-DAG: ds_read_b32
157; SI-DAG: ds_write_b32
158
159; SI-DAG: ds_read_b32
160; SI-DAG: ds_write_b32
161
162; SI: s_endpgm
163define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
164  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
165  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
166  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
167  ret void
168}
169
170; FIXME: Use 64-bit ops
171; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
172
173; SI-DAG: ds_read_b32
174; SI-DAG: ds_write_b32
175
176; SI-DAG: ds_read_b32
177; SI-DAG: ds_write_b32
178
179; SI-DAG: ds_read_b32
180; SI-DAG: ds_write_b32
181
182; SI-DAG: ds_read_b32
183; SI-DAG: ds_write_b32
184
185; SI-DAG: ds_read_b32
186; SI-DAG: ds_write_b32
187
188; SI-DAG: ds_read_b32
189; SI-DAG: ds_write_b32
190
191; SI-DAG: ds_read_b32
192; SI-DAG: ds_write_b32
193
194; SI-DAG: ds_read_b32
195; SI-DAG: ds_write_b32
196
197; SI-DAG: ds_read_b32
198; SI-DAG: ds_write_b32
199
200; SI-DAG: s_endpgm
201define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
202  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
203  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
204  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
205  ret void
206}
207
208; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
209; SI-DAG: buffer_load_ubyte
210; SI-DAG: buffer_store_byte
211; SI-DAG: buffer_load_ubyte
212; SI-DAG: buffer_store_byte
213; SI-DAG: buffer_load_ubyte
214; SI-DAG: buffer_store_byte
215; SI-DAG: buffer_load_ubyte
216; SI-DAG: buffer_store_byte
217; SI-DAG: buffer_load_ubyte
218; SI-DAG: buffer_store_byte
219; SI-DAG: buffer_load_ubyte
220; SI-DAG: buffer_store_byte
221; SI-DAG: buffer_load_ubyte
222; SI-DAG: buffer_store_byte
223; SI-DAG: buffer_load_ubyte
224; SI-DAG: buffer_store_byte
225
226; SI-DAG: buffer_load_ubyte
227; SI-DAG: buffer_store_byte
228; SI-DAG: buffer_load_ubyte
229; SI-DAG: buffer_store_byte
230; SI-DAG: buffer_load_ubyte
231; SI-DAG: buffer_store_byte
232; SI-DAG: buffer_load_ubyte
233; SI-DAG: buffer_store_byte
234; SI-DAG: buffer_load_ubyte
235; SI-DAG: buffer_store_byte
236; SI-DAG: buffer_load_ubyte
237; SI-DAG: buffer_store_byte
238; SI-DAG: buffer_load_ubyte
239; SI-DAG: buffer_store_byte
240; SI-DAG: buffer_load_ubyte
241; SI-DAG: buffer_store_byte
242
243; SI-DAG: buffer_load_ubyte
244; SI-DAG: buffer_store_byte
245; SI-DAG: buffer_load_ubyte
246; SI-DAG: buffer_store_byte
247; SI-DAG: buffer_load_ubyte
248; SI-DAG: buffer_store_byte
249; SI-DAG: buffer_load_ubyte
250; SI-DAG: buffer_store_byte
251; SI-DAG: buffer_load_ubyte
252; SI-DAG: buffer_store_byte
253; SI-DAG: buffer_load_ubyte
254; SI-DAG: buffer_store_byte
255; SI-DAG: buffer_load_ubyte
256; SI-DAG: buffer_store_byte
257; SI-DAG: buffer_load_ubyte
258; SI-DAG: buffer_store_byte
259
260; SI-DAG: buffer_load_ubyte
261; SI-DAG: buffer_store_byte
262; SI-DAG: buffer_load_ubyte
263; SI-DAG: buffer_store_byte
264; SI-DAG: buffer_load_ubyte
265; SI-DAG: buffer_store_byte
266; SI-DAG: buffer_load_ubyte
267; SI-DAG: buffer_store_byte
268; SI-DAG: buffer_load_ubyte
269; SI-DAG: buffer_store_byte
270; SI-DAG: buffer_load_ubyte
271; SI-DAG: buffer_store_byte
272; SI-DAG: buffer_load_ubyte
273; SI-DAG: buffer_store_byte
274; SI-DAG: buffer_load_ubyte
275; SI-DAG: buffer_store_byte
276
277; SI: s_endpgm
278define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
279  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
280  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
281  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
282  ret void
283}
284
285; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
286; SI-DAG: buffer_load_ushort
287; SI-DAG: buffer_load_ushort
288; SI-DAG: buffer_load_ushort
289; SI-DAG: buffer_load_ushort
290; SI-DAG: buffer_load_ushort
291; SI-DAG: buffer_load_ushort
292; SI-DAG: buffer_load_ushort
293; SI-DAG: buffer_load_ushort
294; SI-DAG: buffer_load_ushort
295; SI-DAG: buffer_load_ushort
296; SI-DAG: buffer_load_ushort
297; SI-DAG: buffer_load_ushort
298; SI-DAG: buffer_load_ushort
299; SI-DAG: buffer_load_ushort
300; SI-DAG: buffer_load_ushort
301; SI-DAG: buffer_load_ushort
302
303; SI-DAG: buffer_store_short
304; SI-DAG: buffer_store_short
305; SI-DAG: buffer_store_short
306; SI-DAG: buffer_store_short
307; SI-DAG: buffer_store_short
308; SI-DAG: buffer_store_short
309; SI-DAG: buffer_store_short
310; SI-DAG: buffer_store_short
311; SI-DAG: buffer_store_short
312; SI-DAG: buffer_store_short
313; SI-DAG: buffer_store_short
314; SI-DAG: buffer_store_short
315; SI-DAG: buffer_store_short
316; SI-DAG: buffer_store_short
317; SI-DAG: buffer_store_short
318; SI-DAG: buffer_store_short
319
320; SI: s_endpgm
321define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
322  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
323  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
324  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
325  ret void
326}
327
328; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
329; SI: buffer_load_dwordx4
330; SI: buffer_load_dwordx4
331; SI: buffer_store_dwordx4
332; SI: buffer_store_dwordx4
333; SI: s_endpgm
334define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
335  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
336  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
337  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
338  ret void
339}
340
341; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
342; SI: buffer_load_dwordx4
343; SI: buffer_load_dwordx4
344; SI: buffer_store_dwordx4
345; SI: buffer_store_dwordx4
346; SI: s_endpgm
347define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
348  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
349  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
350  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
351  ret void
352}
353
354; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
355; SI: buffer_load_dwordx4
356; SI: buffer_load_dwordx4
357; SI: buffer_store_dwordx4
358; SI: buffer_store_dwordx4
359; SI: s_endpgm
360define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
361  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
362  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
363  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
364  ret void
365}
366