• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
2; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
4
5; SI-LABEL: {{^}}local_unaligned_load_store_i16:
6; SI: ds_read_u8
7; SI: ds_read_u8
8; SI: ds_write_b8
9; SI: ds_write_b8
10; SI: s_endpgm
11define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(3)* %r) #0 {
12  %v = load i16, i16 addrspace(3)* %p, align 1
13  store i16 %v, i16 addrspace(3)* %r, align 1
14  ret void
15}
16
17; SI-LABEL: {{^}}global_unaligned_load_store_i16:
18; ALIGNED: buffer_load_ubyte
19; ALIGNED: buffer_load_ubyte
20; ALIGNED: buffer_store_byte
21; ALIGNED: buffer_store_byte
22
23; UNALIGNED: buffer_load_ushort
24; UNALIGNED: buffer_store_short
25; SI: s_endpgm
26define void @global_unaligned_load_store_i16(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 {
27  %v = load i16, i16 addrspace(1)* %p, align 1
28  store i16 %v, i16 addrspace(1)* %r, align 1
29  ret void
30}
31
32; FUNC-LABEL: {{^}}local_unaligned_load_store_i32:
33
34; SI: ds_read_u8
35; SI: ds_read_u8
36; SI: ds_read_u8
37; SI: ds_read_u8
38; SI-NOT: v_or
39; SI-NOT: v_lshl
40; SI: ds_write_b8
41; SI: ds_write_b8
42; SI: ds_write_b8
43; SI: ds_write_b8
44; SI: s_endpgm
45define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
46  %v = load i32, i32 addrspace(3)* %p, align 1
47  store i32 %v, i32 addrspace(3)* %r, align 1
48  ret void
49}
50
51; SI-LABEL: {{^}}global_unaligned_load_store_i32:
52; ALIGNED: buffer_load_ubyte
53; ALIGNED: buffer_load_ubyte
54; ALIGNED: buffer_load_ubyte
55; ALIGNED: buffer_load_ubyte
56; ALIGNED: buffer_store_byte
57; ALIGNED: buffer_store_byte
58; ALIGNED: buffer_store_byte
59; ALIGNED: buffer_store_byte
60
61; UNALIGNED: buffer_load_dword
62; UNALIGNED: buffer_store_dword
63define void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
64  %v = load i32, i32 addrspace(1)* %p, align 1
65  store i32 %v, i32 addrspace(1)* %r, align 1
66  ret void
67}
68
69; SI-LABEL: {{^}}global_align2_load_store_i32:
70; ALIGNED: buffer_load_ushort
71; ALIGNED: buffer_load_ushort
72; ALIGNED: buffer_store_short
73; ALIGNED: buffer_store_short
74
75; UNALIGNED: buffer_load_dword
76; UNALIGNED: buffer_store_dword
77define void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
78  %v = load i32, i32 addrspace(1)* %p, align 2
79  store i32 %v, i32 addrspace(1)* %r, align 2
80  ret void
81}
82
83; FUNC-LABEL: {{^}}local_align2_load_store_i32:
84; GCN: ds_read_u16
85; GCN: ds_read_u16
86; GCN: ds_write_b16
87; GCN: ds_write_b16
88define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
89  %v = load i32, i32 addrspace(3)* %p, align 2
90  store i32 %v, i32 addrspace(3)* %r, align 2
91  ret void
92}
93
94; FUNC-LABEL: {{^}}local_unaligned_load_store_i64:
95; SI: ds_read_u8
96; SI: ds_read_u8
97; SI: ds_read_u8
98; SI: ds_read_u8
99; SI: ds_read_u8
100; SI: ds_read_u8
101; SI: ds_read_u8
102; SI: ds_read_u8
103
104; SI-NOT: v_or_b32
105; SI-NOT: v_lshl
106; SI: ds_write_b8
107; SI-NOT: v_or_b32
108; SI-NOT: v_lshl
109
110; SI: ds_write_b8
111; SI-NOT: v_or_b32
112; SI-NOT: v_lshl
113
114; SI: ds_write_b8
115; SI-NOT: v_or_b32
116; SI-NOT: v_lshl
117
118; SI: ds_write_b8
119; SI-NOT: v_or_b32
120; SI-NOT: v_lshl
121
122; SI: ds_write_b8
123; SI-NOT: v_or_b32
124; SI-NOT: v_lshl
125
126; SI: ds_write_b8
127; SI-NOT: v_or_b32
128; SI-NOT: v_lshl
129
130; SI: ds_write_b8
131; SI-NOT: v_or_b32
132; SI-NOT: v_lshl
133; SI: ds_write_b8
134; SI: s_endpgm
135define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) #0 {
136  %v = load i64, i64 addrspace(3)* %p, align 1
137  store i64 %v, i64 addrspace(3)* %r, align 1
138  ret void
139}
140
141; SI-LABEL: {{^}}local_unaligned_load_store_v2i32:
142; SI: ds_read_u8
143; SI: ds_read_u8
144; SI: ds_read_u8
145; SI: ds_read_u8
146; SI: ds_read_u8
147; SI: ds_read_u8
148; SI: ds_read_u8
149; SI: ds_read_u8
150
151; SI-NOT: v_or_b32
152; SI-NOT: v_lshl
153; SI: ds_write_b8
154; SI-NOT: v_or_b32
155; SI-NOT: v_lshl
156
157; SI: ds_write_b8
158; SI-NOT: v_or_b32
159; SI-NOT: v_lshl
160
161; SI: ds_write_b8
162; SI-NOT: v_or_b32
163; SI-NOT: v_lshl
164
165; SI: ds_write_b8
166; SI-NOT: v_or_b32
167; SI-NOT: v_lshl
168
169; SI: ds_write_b8
170; SI-NOT: v_or_b32
171; SI-NOT: v_lshl
172
173; SI: ds_write_b8
174; SI-NOT: v_or_b32
175; SI-NOT: v_lshl
176
177; SI: ds_write_b8
178; SI-NOT: v_or_b32
179; SI-NOT: v_lshl
180; SI: ds_write_b8
181; SI: s_endpgm
182define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) #0 {
183  %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
184  store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
185  ret void
186}
187
188; SI-LABEL: {{^}}global_align2_load_store_i64:
189; ALIGNED: buffer_load_ushort
190; ALIGNED: buffer_load_ushort
191
192; ALIGNED-NOT: v_or_
193; ALIGNED-NOT: v_lshl
194
195; ALIGNED: buffer_load_ushort
196
197; ALIGNED-NOT: v_or_
198; ALIGNED-NOT: v_lshl
199
200; ALIGNED: buffer_load_ushort
201
202; ALIGNED-NOT: v_or_
203; ALIGNED-NOT: v_lshl
204
205; ALIGNED: buffer_store_short
206; ALIGNED: buffer_store_short
207; ALIGNED: buffer_store_short
208; ALIGNED: buffer_store_short
209
210; UNALIGNED: buffer_load_dwordx2
211; UNALIGNED: buffer_store_dwordx2
212define void @global_align2_load_store_i64(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 {
213  %v = load i64, i64 addrspace(1)* %p, align 2
214  store i64 %v, i64 addrspace(1)* %r, align 2
215  ret void
216}
217
218; SI-LABEL: {{^}}unaligned_load_store_i64_global:
219; ALIGNED: buffer_load_ubyte
220; ALIGNED: buffer_load_ubyte
221; ALIGNED: buffer_load_ubyte
222; ALIGNED: buffer_load_ubyte
223; ALIGNED: buffer_load_ubyte
224; ALIGNED: buffer_load_ubyte
225; ALIGNED: buffer_load_ubyte
226; ALIGNED: buffer_load_ubyte
227
228; ALIGNED-NOT: v_or_
229; ALIGNED-NOT: v_lshl
230
231; ALIGNED: buffer_store_byte
232; ALIGNED: buffer_store_byte
233; ALIGNED: buffer_store_byte
234; ALIGNED: buffer_store_byte
235; ALIGNED: buffer_store_byte
236; ALIGNED: buffer_store_byte
237; ALIGNED: buffer_store_byte
238; ALIGNED: buffer_store_byte
239
240; UNALIGNED: buffer_load_dwordx2
241; UNALIGNED: buffer_store_dwordx2
242define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 {
243  %v = load i64, i64 addrspace(1)* %p, align 1
244  store i64 %v, i64 addrspace(1)* %r, align 1
245  ret void
246}
247
248; FUNC-LABEL: {{^}}local_unaligned_load_store_v4i32:
249; GCN: ds_read_u8
250; GCN: ds_read_u8
251; GCN: ds_read_u8
252; GCN: ds_read_u8
253
254; GCN: ds_read_u8
255; GCN: ds_read_u8
256; GCN: ds_read_u8
257; GCN: ds_read_u8
258
259; GCN: ds_read_u8
260; GCN: ds_read_u8
261; GCN: ds_read_u8
262; GCN: ds_read_u8
263
264; GCN: ds_read_u8
265; GCN: ds_read_u8
266; GCN: ds_read_u8
267; GCN: ds_read_u8
268
269; GCN: ds_write_b8
270; GCN: ds_write_b8
271; GCN: ds_write_b8
272; GCN: ds_write_b8
273
274; GCN: ds_write_b8
275; GCN: ds_write_b8
276; GCN: ds_write_b8
277; GCN: ds_write_b8
278
279; GCN: ds_write_b8
280; GCN: ds_write_b8
281; GCN: ds_write_b8
282; GCN: ds_write_b8
283
284; GCN: ds_write_b8
285; GCN: ds_write_b8
286; GCN: ds_write_b8
287; GCN: ds_write_b8
288; GCN: s_endpgm
289define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) #0 {
290  %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
291  store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
292  ret void
293}
294
295; SI-LABEL: {{^}}global_unaligned_load_store_v4i32
296; ALIGNED: buffer_load_ubyte
297; ALIGNED: buffer_load_ubyte
298; ALIGNED: buffer_load_ubyte
299; ALIGNED: buffer_load_ubyte
300; ALIGNED: buffer_load_ubyte
301; ALIGNED: buffer_load_ubyte
302; ALIGNED: buffer_load_ubyte
303; ALIGNED: buffer_load_ubyte
304; ALIGNED: buffer_load_ubyte
305; ALIGNED: buffer_load_ubyte
306; ALIGNED: buffer_load_ubyte
307; ALIGNED: buffer_load_ubyte
308; ALIGNED: buffer_load_ubyte
309; ALIGNED: buffer_load_ubyte
310; ALIGNED: buffer_load_ubyte
311; ALIGNED: buffer_load_ubyte
312
313; ALIGNED: buffer_store_byte
314; ALIGNED: buffer_store_byte
315; ALIGNED: buffer_store_byte
316; ALIGNED: buffer_store_byte
317; ALIGNED: buffer_store_byte
318; ALIGNED: buffer_store_byte
319; ALIGNED: buffer_store_byte
320; ALIGNED: buffer_store_byte
321; ALIGNED: buffer_store_byte
322; ALIGNED: buffer_store_byte
323; ALIGNED: buffer_store_byte
324; ALIGNED: buffer_store_byte
325; ALIGNED: buffer_store_byte
326; ALIGNED: buffer_store_byte
327; ALIGNED: buffer_store_byte
328; ALIGNED: buffer_store_byte
329
330; UNALIGNED: buffer_load_dwordx4
331; UNALIGNED: buffer_store_dwordx4
332define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) #0 {
333  %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
334  store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
335  ret void
336}
337
338; FUNC-LABEL: {{^}}local_load_i64_align_4:
339; GCN: ds_read2_b32
340define void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
341  %val = load i64, i64 addrspace(3)* %in, align 4
342  store i64 %val, i64 addrspace(1)* %out, align 8
343  ret void
344}
345
346; FUNC-LABEL: {{^}}local_load_i64_align_4_with_offset
347; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
348define void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
349  %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
350  %val = load i64, i64 addrspace(3)* %ptr, align 4
351  store i64 %val, i64 addrspace(1)* %out, align 8
352  ret void
353}
354
355; FUNC-LABEL: {{^}}local_load_i64_align_4_with_split_offset:
356; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
357; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
358; GCN: s_endpgm
359define void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
360  %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
361  %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
362  %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
363  %val = load i64, i64 addrspace(3)* %ptri64, align 4
364  store i64 %val, i64 addrspace(1)* %out, align 8
365  ret void
366}
367
368; FUNC-LABEL: {{^}}local_load_i64_align_1:
369; GCN: ds_read_u8
370; GCN: ds_read_u8
371; GCN: ds_read_u8
372; GCN: ds_read_u8
373; GCN: ds_read_u8
374; GCN: ds_read_u8
375; GCN: ds_read_u8
376; GCN: ds_read_u8
377; GCN: store_dwordx2
378define void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
379  %val = load i64, i64 addrspace(3)* %in, align 1
380  store i64 %val, i64 addrspace(1)* %out, align 8
381  ret void
382}
383
384; FUNC-LABEL: {{^}}local_store_i64_align_4:
385; GCN: ds_write2_b32
386define void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
387  store i64 %val, i64 addrspace(3)* %out, align 4
388  ret void
389}
390
391; FUNC-LABEL: {{^}}local_store_i64_align_4_with_offset
392; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
393; GCN: s_endpgm
394define void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
395  %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4
396  store i64 0, i64 addrspace(3)* %ptr, align 4
397  ret void
398}
399
400; FUNC-LABEL: {{^}}local_store_i64_align_4_with_split_offset:
401; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
402; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
403; GCN: s_endpgm
404define void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
405  %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
406  %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
407  %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
408  store i64 0, i64 addrspace(3)* %out, align 4
409  ret void
410}
411
412; SI-LABEL: {{^}}constant_unaligned_load_i32:
413; ALIGNED: buffer_load_ubyte
414; ALIGNED: buffer_load_ubyte
415; ALIGNED: buffer_load_ubyte
416; ALIGNED: buffer_load_ubyte
417
418; UNALIGNED: s_load_dword
419
420; SI: buffer_store_dword
421define void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
422  %v = load i32, i32 addrspace(2)* %p, align 1
423  store i32 %v, i32 addrspace(1)* %r, align 4
424  ret void
425}
426
427; SI-LABEL: {{^}}constant_align2_load_i32:
428; ALIGNED: buffer_load_ushort
429; ALIGNED: buffer_load_ushort
430
431; UNALIGNED: s_load_dword
432; UNALIGNED: buffer_store_dword
433define void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
434  %v = load i32, i32 addrspace(2)* %p, align 2
435  store i32 %v, i32 addrspace(1)* %r, align 4
436  ret void
437}
438
439; SI-LABEL: {{^}}constant_align2_load_i64:
440; ALIGNED: buffer_load_ushort
441; ALIGNED: buffer_load_ushort
442; ALIGNED: buffer_load_ushort
443; ALIGNED: buffer_load_ushort
444
445; UNALIGNED: s_load_dwordx2
446; UNALIGNED: buffer_store_dwordx2
447define void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
448  %v = load i64, i64 addrspace(2)* %p, align 2
449  store i64 %v, i64 addrspace(1)* %r, align 4
450  ret void
451}
452
453; SI-LABEL: {{^}}constant_align4_load_i64:
454; SI: s_load_dwordx2
455; SI: buffer_store_dwordx2
456define void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
457  %v = load i64, i64 addrspace(2)* %p, align 4
458  store i64 %v, i64 addrspace(1)* %r, align 4
459  ret void
460}
461
462; SI-LABEL: {{^}}constant_align4_load_v4i32:
463; SI: s_load_dwordx4
464; SI: buffer_store_dwordx4
465define void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
466  %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 4
467  store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
468  ret void
469}
470
471; SI-LABEL: {{^}}constant_unaligned_load_v2i32:
472; ALIGNED: buffer_load_ubyte
473; ALIGNED: buffer_load_ubyte
474; ALIGNED: buffer_load_ubyte
475; ALIGNED: buffer_load_ubyte
476
477; ALIGNED: buffer_load_ubyte
478; ALIGNED: buffer_load_ubyte
479; ALIGNED: buffer_load_ubyte
480; ALIGNED: buffer_load_ubyte
481
482; UNALIGNED: buffer_load_dwordx2
483
484; SI: buffer_store_dwordx2
485define void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)* %p, <2 x i32> addrspace(1)* %r) #0 {
486  %v = load <2 x i32>, <2 x i32> addrspace(2)* %p, align 1
487  store <2 x i32> %v, <2 x i32> addrspace(1)* %r, align 4
488  ret void
489}
490
491; SI-LABEL: {{^}}constant_unaligned_load_v4i32:
492; ALIGNED: buffer_load_ubyte
493; ALIGNED: buffer_load_ubyte
494; ALIGNED: buffer_load_ubyte
495; ALIGNED: buffer_load_ubyte
496
497; ALIGNED: buffer_load_ubyte
498; ALIGNED: buffer_load_ubyte
499; ALIGNED: buffer_load_ubyte
500; ALIGNED: buffer_load_ubyte
501
502; ALIGNED: buffer_load_ubyte
503; ALIGNED: buffer_load_ubyte
504; ALIGNED: buffer_load_ubyte
505; ALIGNED: buffer_load_ubyte
506
507; ALIGNED: buffer_load_ubyte
508; ALIGNED: buffer_load_ubyte
509; ALIGNED: buffer_load_ubyte
510; ALIGNED: buffer_load_ubyte
511
512; UNALIGNED: buffer_load_dwordx4
513
514; SI: buffer_store_dwordx4
515define void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
516  %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 1
517  store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
518  ret void
519}
520
521; SI-LABEL: {{^}}constant_align4_load_i8:
522; SI: buffer_load_ubyte
523; SI: buffer_store_byte
524define void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
525  %v = load i8, i8 addrspace(2)* %p, align 4
526  store i8 %v, i8 addrspace(1)* %r, align 4
527  ret void
528}
529
530; SI-LABEL: {{^}}constant_align2_load_i8:
531; SI: buffer_load_ubyte
532; SI: buffer_store_byte
533define void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
534  %v = load i8, i8 addrspace(2)* %p, align 2
535  store i8 %v, i8 addrspace(1)* %r, align 2
536  ret void
537}
538
539; SI-LABEL: {{^}}constant_align4_merge_load_2_i32:
540; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
541; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[LO]]
542; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HI]]
543; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
544define void @constant_align4_merge_load_2_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
545  %gep0 = getelementptr i32, i32 addrspace(2)* %p, i64 1
546  %v0 = load i32, i32 addrspace(2)* %p, align 4
547  %v1 = load i32, i32 addrspace(2)* %gep0, align 4
548
549  %gep1 = getelementptr i32, i32 addrspace(1)* %r, i64 1
550  store i32 %v0, i32 addrspace(1)* %r, align 4
551  store i32 %v1, i32 addrspace(1)* %gep1, align 4
552  ret void
553}
554
555attributes #0 = { nounwind }
556