• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA %s
2
3; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
4; HSA: enable_sgpr_private_segment_buffer = 1
5; HSA: enable_sgpr_dispatch_ptr = 0
6; HSA: enable_sgpr_queue_ptr = 1
7
8; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
9; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
10
11; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
12; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
13
14; HSA-DAG: v_cmp_ne_i32_e64 vcc, -1, [[PTR]]
15; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
16; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
17; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
18
19; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
20define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
21  %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
22  store volatile i32 7, i32 addrspace(4)* %stof
23  ret void
24}
25
26; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
27; HSA: enable_sgpr_private_segment_buffer = 1
28; HSA: enable_sgpr_dispatch_ptr = 0
29; HSA: enable_sgpr_queue_ptr = 1
30
31; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
32; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
33
34; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
35; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
36
37; HSA-DAG: v_cmp_ne_i32_e64 vcc, -1, [[PTR]]
38; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
39; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
40; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
41
42; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
43define void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
44  %stof = addrspacecast i32* %ptr to i32 addrspace(4)*
45  store volatile i32 7, i32 addrspace(4)* %stof
46  ret void
47}
48
49; no-op
50; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
51; HSA: enable_sgpr_queue_ptr = 0
52
53; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
54; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
55; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
56; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
57; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
58define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
59  %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
60  store volatile i32 7, i32 addrspace(4)* %stof
61  ret void
62}
63
64; no-op
65; HSA-LABEl: {{^}}use_constant_to_flat_addrspacecast:
66; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}
67; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
68; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
69; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
70define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
71  %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
72  %ld = load volatile i32, i32 addrspace(4)* %stof
73  ret void
74}
75
76; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast:
77; HSA: enable_sgpr_private_segment_buffer = 1
78; HSA: enable_sgpr_dispatch_ptr = 0
79; HSA: enable_sgpr_queue_ptr = 0
80
81; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
82; HSA-DAG: v_cmp_ne_i64_e64 vcc, 0, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}
83; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
84; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
85; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
86; HSA: ds_write_b32 [[CASTPTR]], v[[K]]
87define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
88  %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
89  store volatile i32 0, i32 addrspace(3)* %ftos
90  ret void
91}
92
93; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast:
94; HSA: enable_sgpr_private_segment_buffer = 1
95; HSA: enable_sgpr_dispatch_ptr = 0
96; HSA: enable_sgpr_queue_ptr = 0
97
98; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
99; HSA-DAG: v_cmp_ne_i64_e64 vcc, 0, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}
100; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
101; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
102; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
103; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
104define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 {
105  %ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
106  store volatile i32 0, i32* %ftos
107  ret void
108}
109
110; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast:
111; HSA: enable_sgpr_queue_ptr = 0
112
113; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
114; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
115; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
116; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
117; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
118define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
119  %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
120  store volatile i32 0, i32 addrspace(1)* %ftos
121  ret void
122}
123
124; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast:
125; HSA: enable_sgpr_queue_ptr = 0
126
127; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
128; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
129define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
130  %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
131  load volatile i32, i32 addrspace(2)* %ftos
132  ret void
133}
134
135; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
136; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
137; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
138; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
139; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
140; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
141define void @cast_0_group_to_flat_addrspacecast() #0 {
142  %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
143  store i32 7, i32 addrspace(4)* %cast
144  ret void
145}
146
147; HSA-LABEL: {{^}}cast_0_flat_to_group_addrspacecast:
148; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
149; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
150; HSA: ds_write_b32 [[PTR]], [[K]]
151define void @cast_0_flat_to_group_addrspacecast() #0 {
152  %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(3)*
153  store i32 7, i32 addrspace(3)* %cast
154  ret void
155}
156
157; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast:
158; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
159; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
160; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
161; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
162define void @cast_neg1_group_to_flat_addrspacecast() #0 {
163  %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
164  store i32 7, i32 addrspace(4)* %cast
165  ret void
166}
167
168; HSA-LABEL: {{^}}cast_neg1_flat_to_group_addrspacecast:
169; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
170; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
171; HSA: ds_write_b32 [[PTR]], [[K]]
172define void @cast_neg1_flat_to_group_addrspacecast() #0 {
173  %cast = addrspacecast i32 addrspace(4)* inttoptr (i64 -1 to i32 addrspace(4)*) to i32 addrspace(3)*
174  store i32 7, i32 addrspace(3)* %cast
175  ret void
176}
177
178; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
179; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
180; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
181; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
182; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
183; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
184define void @cast_0_private_to_flat_addrspacecast() #0 {
185  %cast = addrspacecast i32* null to i32 addrspace(4)*
186  store i32 7, i32 addrspace(4)* %cast
187  ret void
188}
189
190; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast:
191; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
192; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
193; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
194define void @cast_0_flat_to_private_addrspacecast() #0 {
195  %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)*
196  store i32 7, i32* %cast
197  ret void
198}
199
200; Disable optimizations in case there are optimizations added that
201; specialize away generic pointer accesses.
202
203; HSA-LABEL: {{^}}branch_use_flat_i32:
204; HSA: flat_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
205; HSA: s_endpgm
206define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
207entry:
208  %cmp = icmp ne i32 %c, 0
209  br i1 %cmp, label %local, label %global
210
211local:
212  %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
213  br label %end
214
215global:
216  %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
217  br label %end
218
219end:
220  %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
221  store i32 %x, i32 addrspace(4)* %fptr, align 4
222;  %val = load i32, i32 addrspace(4)* %fptr, align 4
223;  store i32 %val, i32 addrspace(1)* %out, align 4
224  ret void
225}
226
227; Check for prologue initializing special SGPRs pointing to scratch.
228; HSA-LABEL: {{^}}store_flat_scratch:
229; HSA-DAG: s_mov_b32 flat_scratch_lo, s9
230; HSA-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
231; HSA: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
232; HSA: flat_store_dword
233; HSA: s_barrier
234; HSA: flat_load_dword
235define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
236  %alloca = alloca i32, i32 9, align 4
237  %x = call i32 @llvm.amdgcn.workitem.id.x() #2
238  %pptr = getelementptr i32, i32* %alloca, i32 %x
239  %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
240  store i32 %x, i32 addrspace(4)* %fptr
241  ; Dummy call
242  call void @llvm.amdgcn.s.barrier() #1
243  %reload = load i32, i32 addrspace(4)* %fptr, align 4
244  store i32 %reload, i32 addrspace(1)* %out, align 4
245  ret void
246}
247
248declare void @llvm.amdgcn.s.barrier() #1
249declare i32 @llvm.amdgcn.workitem.id.x() #2
250
251attributes #0 = { nounwind }
252attributes #1 = { nounwind convergent }
253attributes #2 = { nounwind readnone }
254