1; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s 2 3declare i32 @llvm.amdgcn.workgroup.id.x() #0 4declare i32 @llvm.amdgcn.workgroup.id.y() #0 5declare i32 @llvm.amdgcn.workgroup.id.z() #0 6 7declare i32 @llvm.amdgcn.workitem.id.x() #0 8declare i32 @llvm.amdgcn.workitem.id.y() #0 9declare i32 @llvm.amdgcn.workitem.id.z() #0 10 11declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 12declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0 13 14; HSA: define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { 15define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { 16 %val = call i32 @llvm.amdgcn.workgroup.id.x() 17 store i32 %val, i32 addrspace(1)* %ptr 18 ret void 19} 20 21; HSA: define void @use_tgid_y(i32 addrspace(1)* %ptr) #2 { 22define void @use_tgid_y(i32 addrspace(1)* %ptr) #1 { 23 %val = call i32 @llvm.amdgcn.workgroup.id.y() 24 store i32 %val, i32 addrspace(1)* %ptr 25 ret void 26} 27 28; HSA: define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 { 29define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 { 30 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 31 store volatile i32 %val0, i32 addrspace(1)* %ptr 32 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 33 store volatile i32 %val1, i32 addrspace(1)* %ptr 34 ret void 35} 36 37; HSA: define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 { 38define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 { 39 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 40 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 41 store volatile i32 %val0, i32 addrspace(1)* %ptr 42 store volatile i32 %val1, i32 addrspace(1)* %ptr 43 ret void 44} 45 46; HSA: define void @use_tgid_z(i32 addrspace(1)* %ptr) #3 { 47define void @use_tgid_z(i32 addrspace(1)* %ptr) #1 { 48 %val = call i32 @llvm.amdgcn.workgroup.id.z() 49 store i32 %val, i32 addrspace(1)* %ptr 50 ret void 51} 52 53; HSA: define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 { 54define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 { 55 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 56 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 57 store volatile i32 %val0, i32 addrspace(1)* %ptr 58 store volatile i32 %val1, i32 addrspace(1)* %ptr 59 ret void 60} 61 62; HSA: define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 { 63define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 { 64 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 65 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 66 store volatile i32 %val0, i32 addrspace(1)* %ptr 67 store volatile i32 %val1, i32 addrspace(1)* %ptr 68 ret void 69} 70 71; HSA: define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 { 72define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 { 73 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 74 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 75 %val2 = call i32 @llvm.amdgcn.workgroup.id.z() 76 store volatile i32 %val0, i32 addrspace(1)* %ptr 77 store volatile i32 %val1, i32 addrspace(1)* %ptr 78 store volatile i32 %val2, i32 addrspace(1)* %ptr 79 ret void 80} 81 82; HSA: define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { 83define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { 84 %val = call i32 @llvm.amdgcn.workitem.id.x() 85 store i32 %val, i32 addrspace(1)* %ptr 86 ret void 87} 88 89; HSA: define void @use_tidig_y(i32 addrspace(1)* %ptr) #5 { 90define void @use_tidig_y(i32 addrspace(1)* %ptr) #1 { 91 %val = call i32 @llvm.amdgcn.workitem.id.y() 92 store i32 %val, i32 addrspace(1)* %ptr 93 ret void 94} 95 96; HSA: define void @use_tidig_z(i32 addrspace(1)* %ptr) #6 { 97define void @use_tidig_z(i32 addrspace(1)* %ptr) #1 { 98 %val = call i32 @llvm.amdgcn.workitem.id.z() 99 store i32 %val, i32 addrspace(1)* %ptr 100 ret void 101} 102 103; HSA: define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { 104define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { 105 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 106 %val1 = call i32 @llvm.amdgcn.workgroup.id.x() 107 store volatile i32 %val0, i32 addrspace(1)* %ptr 108 store volatile i32 %val1, i32 addrspace(1)* %ptr 109 ret void 110} 111 112; HSA: define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 { 113define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 { 114 %val0 = call i32 @llvm.amdgcn.workitem.id.y() 115 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 116 store volatile i32 %val0, i32 addrspace(1)* %ptr 117 store volatile i32 %val1, i32 addrspace(1)* %ptr 118 ret void 119} 120 121; HSA: define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 { 122define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 { 123 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 124 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 125 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 126 store volatile i32 %val0, i32 addrspace(1)* %ptr 127 store volatile i32 %val1, i32 addrspace(1)* %ptr 128 store volatile i32 %val2, i32 addrspace(1)* %ptr 129 ret void 130} 131 132; HSA: define void @use_all_workitems(i32 addrspace(1)* %ptr) #9 { 133define void @use_all_workitems(i32 addrspace(1)* %ptr) #1 { 134 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 135 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 136 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 137 %val3 = call i32 @llvm.amdgcn.workgroup.id.x() 138 %val4 = call i32 @llvm.amdgcn.workgroup.id.y() 139 %val5 = call i32 @llvm.amdgcn.workgroup.id.z() 140 store volatile i32 %val0, i32 addrspace(1)* %ptr 141 store volatile i32 %val1, i32 addrspace(1)* %ptr 142 store volatile i32 %val2, i32 addrspace(1)* %ptr 143 store volatile i32 %val3, i32 addrspace(1)* %ptr 144 store volatile i32 %val4, i32 addrspace(1)* %ptr 145 store volatile i32 %val5, i32 addrspace(1)* %ptr 146 ret void 147} 148 149; HSA: define void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 { 150define void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 { 151 %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() 152 %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)* 153 %val = load i32, i32 addrspace(2)* %bc 154 store i32 %val, i32 addrspace(1)* %ptr 155 ret void 156} 157 158; HSA: define void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 { 159define void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 { 160 %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr() 161 %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)* 162 %val = load i32, i32 addrspace(2)* %bc 163 store i32 %val, i32 addrspace(1)* %ptr 164 ret void 165} 166 167; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 { 168define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { 169 %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* 170 store volatile i32 0, i32 addrspace(4)* %stof 171 ret void 172} 173 174; HSA: define void @use_private_to_flat_addrspacecast(i32* %ptr) #11 { 175define void @use_private_to_flat_addrspacecast(i32* %ptr) #1 { 176 %stof = addrspacecast i32* %ptr to i32 addrspace(4)* 177 store volatile i32 0, i32 addrspace(4)* %stof 178 ret void 179} 180 181; HSA: define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 { 182define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 { 183 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)* 184 store volatile i32 0, i32 addrspace(3)* %ftos 185 ret void 186} 187 188; HSA: define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 { 189define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 { 190 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32* 191 store volatile i32 0, i32* %ftos 192 ret void 193} 194 195; No-op addrspacecast should not use queue ptr 196; HSA: define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { 197define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { 198 %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)* 199 store volatile i32 0, i32 addrspace(4)* %stof 200 ret void 201} 202 203; HSA: define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 { 204define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 { 205 %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)* 206 %ld = load volatile i32, i32 addrspace(4)* %stof 207 ret void 208} 209 210; HSA: define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 { 211define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 { 212 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)* 213 store volatile i32 0, i32 addrspace(1)* %ftos 214 ret void 215} 216 217; HSA: define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 { 218define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 { 219 %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)* 220 %ld = load volatile i32, i32 addrspace(2)* %ftos 221 ret void 222} 223 224attributes #0 = { nounwind readnone } 225attributes #1 = { nounwind } 226 227; HSA: attributes #0 = { nounwind readnone } 228; HSA: attributes #1 = { nounwind } 229; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" } 230; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" } 231; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } 232; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" } 233; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" } 234; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } 235; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 236; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 237; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" } 238; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" } 239