1; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s 2 3target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 4 5declare i32 @llvm.amdgcn.workgroup.id.x() #0 6declare i32 @llvm.amdgcn.workgroup.id.y() #0 7declare i32 @llvm.amdgcn.workgroup.id.z() #0 8 9declare i32 @llvm.amdgcn.workitem.id.x() #0 10declare i32 @llvm.amdgcn.workitem.id.y() #0 11declare i32 @llvm.amdgcn.workitem.id.z() #0 12 13declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 14declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 15declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 16 17declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #2 18declare i1 @llvm.amdgcn.is.private(i8* nocapture) #2 19 20; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { 21define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { 22 %val = call i32 @llvm.amdgcn.workgroup.id.x() 23 store i32 %val, i32 addrspace(1)* %ptr 24 ret void 25} 26 27; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 { 28define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 { 29 %val = call i32 @llvm.amdgcn.workgroup.id.y() 30 store i32 %val, i32 addrspace(1)* %ptr 31 ret void 32} 33 34; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 { 35define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 { 36 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 37 store volatile i32 %val0, i32 addrspace(1)* %ptr 38 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 39 store volatile i32 %val1, i32 addrspace(1)* %ptr 40 ret void 41} 42 43; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 { 44define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 { 45 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 46 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 47 store volatile i32 %val0, i32 addrspace(1)* %ptr 48 store volatile i32 %val1, i32 addrspace(1)* %ptr 49 ret void 50} 51 52; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 { 53define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 { 54 %val = call i32 @llvm.amdgcn.workgroup.id.z() 55 store i32 %val, i32 addrspace(1)* %ptr 56 ret void 57} 58 59; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 { 60define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 { 61 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 62 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 63 store volatile i32 %val0, i32 addrspace(1)* %ptr 64 store volatile i32 %val1, i32 addrspace(1)* %ptr 65 ret void 66} 67 68; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 { 69define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 { 70 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 71 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 72 store volatile i32 %val0, i32 addrspace(1)* %ptr 73 store volatile i32 %val1, i32 addrspace(1)* %ptr 74 ret void 75} 76 77; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 { 78define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 { 79 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 80 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 81 %val2 = call i32 @llvm.amdgcn.workgroup.id.z() 82 store volatile i32 %val0, i32 addrspace(1)* %ptr 83 store volatile i32 %val1, i32 addrspace(1)* %ptr 84 store volatile i32 %val2, i32 addrspace(1)* %ptr 85 ret void 86} 87 88; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { 89define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { 90 %val = call i32 @llvm.amdgcn.workitem.id.x() 91 store i32 %val, i32 addrspace(1)* %ptr 92 ret void 93} 94 95; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 { 96define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 { 97 %val = call i32 @llvm.amdgcn.workitem.id.y() 98 store i32 %val, i32 addrspace(1)* %ptr 99 ret void 100} 101 102; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 { 103define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 { 104 %val = call i32 @llvm.amdgcn.workitem.id.z() 105 store i32 %val, i32 addrspace(1)* %ptr 106 ret void 107} 108 109; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { 110define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { 111 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 112 %val1 = call i32 @llvm.amdgcn.workgroup.id.x() 113 store volatile i32 %val0, i32 addrspace(1)* %ptr 114 store volatile i32 %val1, i32 addrspace(1)* %ptr 115 ret void 116} 117 118; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 { 119define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 { 120 %val0 = call i32 @llvm.amdgcn.workitem.id.y() 121 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 122 store volatile i32 %val0, i32 addrspace(1)* %ptr 123 store volatile i32 %val1, i32 addrspace(1)* %ptr 124 ret void 125} 126 127; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 { 128define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 { 129 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 130 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 131 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 132 store volatile i32 %val0, i32 addrspace(1)* %ptr 133 store volatile i32 %val1, i32 addrspace(1)* %ptr 134 store volatile i32 %val2, i32 addrspace(1)* %ptr 135 ret void 136} 137 138; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 { 139define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 { 140 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 141 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 142 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 143 %val3 = call i32 @llvm.amdgcn.workgroup.id.x() 144 %val4 = call i32 @llvm.amdgcn.workgroup.id.y() 145 %val5 = call i32 @llvm.amdgcn.workgroup.id.z() 146 store volatile i32 %val0, i32 addrspace(1)* %ptr 147 store volatile i32 %val1, i32 addrspace(1)* %ptr 148 store volatile i32 %val2, i32 addrspace(1)* %ptr 149 store volatile i32 %val3, i32 addrspace(1)* %ptr 150 store volatile i32 %val4, i32 addrspace(1)* %ptr 151 store volatile i32 %val5, i32 addrspace(1)* %ptr 152 ret void 153} 154 155; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 { 156define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 { 157 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 158 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 159 %val = load i32, i32 addrspace(4)* %bc 160 store i32 %val, i32 addrspace(1)* %ptr 161 ret void 162} 163 164; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 { 165define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 { 166 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 167 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 168 %val = load i32, i32 addrspace(4)* %bc 169 store i32 %val, i32 addrspace(1)* %ptr 170 ret void 171} 172 173; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 { 174define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 { 175 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 176 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 177 %val = load i32, i32 addrspace(4)* %bc 178 store i32 %val, i32 addrspace(1)* %ptr 179 ret void 180} 181 182; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 { 183define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { 184 %stof = addrspacecast i32 addrspace(3)* %ptr to i32* 185 store volatile i32 0, i32* %stof 186 ret void 187} 188 189; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 { 190define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 { 191 %stof = addrspacecast i32 addrspace(5)* %ptr to i32* 192 store volatile i32 0, i32* %stof 193 ret void 194} 195 196; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { 197define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { 198 %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* 199 store volatile i32 0, i32 addrspace(3)* %ftos 200 ret void 201} 202 203; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { 204define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { 205 %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* 206 store volatile i32 0, i32 addrspace(5)* %ftos 207 ret void 208} 209 210; No-op addrspacecast should not use queue ptr 211; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { 212define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { 213 %stof = addrspacecast i32 addrspace(1)* %ptr to i32* 214 store volatile i32 0, i32* %stof 215 ret void 216} 217 218; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 { 219define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 { 220 %stof = addrspacecast i32 addrspace(4)* %ptr to i32* 221 %ld = load volatile i32, i32* %stof 222 ret void 223} 224 225; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { 226define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { 227 %ftos = addrspacecast i32* %ptr to i32 addrspace(1)* 228 store volatile i32 0, i32 addrspace(1)* %ftos 229 ret void 230} 231 232; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { 233define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { 234 %ftos = addrspacecast i32* %ptr to i32 addrspace(4)* 235 %ld = load volatile i32, i32 addrspace(4)* %ftos 236 ret void 237} 238 239; HSA: define amdgpu_kernel void @use_is_shared(i8* %ptr) #11 { 240define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 { 241 %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) 242 %ext = zext i1 %is.shared to i32 243 store i32 %ext, i32 addrspace(1)* undef 244 ret void 245} 246 247; HSA: define amdgpu_kernel void @use_is_private(i8* %ptr) #11 { 248define amdgpu_kernel void @use_is_private(i8* %ptr) #1 { 249 %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) 250 %ext = zext i1 %is.private to i32 251 store i32 %ext, i32 addrspace(1)* undef 252 ret void 253} 254 255; HSA: define amdgpu_kernel void @use_alloca() #13 { 256define amdgpu_kernel void @use_alloca() #1 { 257 %alloca = alloca i32, addrspace(5) 258 store i32 0, i32 addrspace(5)* %alloca 259 ret void 260} 261 262; HSA: define amdgpu_kernel void @use_alloca_non_entry_block() #13 { 263define amdgpu_kernel void @use_alloca_non_entry_block() #1 { 264entry: 265 br label %bb 266 267bb: 268 %alloca = alloca i32, addrspace(5) 269 store i32 0, i32 addrspace(5)* %alloca 270 ret void 271} 272 273; HSA: define void @use_alloca_func() #13 { 274define void @use_alloca_func() #1 { 275 %alloca = alloca i32, addrspace(5) 276 store i32 0, i32 addrspace(5)* %alloca 277 ret void 278} 279 280attributes #0 = { nounwind readnone speculatable } 281attributes #1 = { nounwind } 282 283; HSA: attributes #0 = { nounwind readnone speculatable willreturn } 284; HSA: attributes #1 = { nounwind } 285; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" } 286; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" } 287; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } 288; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" } 289; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" } 290; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } 291; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 292; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 293; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" } 294; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" } 295; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" } 296; HSA: attributes #13 = { nounwind "amdgpu-stack-objects" } 297