1; RUN: llc -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=GCN,O2 %s 2; RUN: llc -O0 -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-intrinsics < %s | FileCheck -check-prefix=OPT %s 4 5; GCN-LABEL: {{^}}zext_grp_size_128: 6; GCN-NOT: and_b32 7 8; OPT-LABEL: @zext_grp_size_128 9; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 10; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0 11; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0 12define amdgpu_kernel void @zext_grp_size_128(i32 addrspace(1)* nocapture %arg) #0 { 13bb: 14 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 15 %tmp1 = and i32 %tmp, 127 16 store i32 %tmp1, i32 addrspace(1)* %arg, align 4 17 %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() 18 %tmp3 = and i32 %tmp2, 127 19 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 20 store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 21 %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() 22 %tmp6 = and i32 %tmp5, 127 23 %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 24 store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 25 ret void 26} 27 28; GCN-LABEL: {{^}}zext_grp_size_32x4x1: 29; GCN-NOT: and_b32 30 31; OPT-LABEL: @zext_grp_size_32x4x1 32; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !2 33; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !3 34; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !4 35define amdgpu_kernel void @zext_grp_size_32x4x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !0 { 36bb: 37 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 38 %tmp1 = and i32 %tmp, 31 39 store i32 %tmp1, i32 addrspace(1)* %arg, align 4 40 %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() 41 %tmp3 = and i32 %tmp2, 3 42 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 43 store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 44 %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() 45 %tmp6 = and i32 %tmp5, 1 46 %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 47 store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 48 ret void 49} 50 51; GCN-LABEL: {{^}}zext_grp_size_1x1x1: 52; GCN-NOT: and_b32 53 54; When EarlyCSE is not run this call produces a range max with 0 active bits, 55; which is a special case as an AssertZext from width 0 is invalid. 56; OPT-LABEL: @zext_grp_size_1x1x1 57; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !4 58define amdgpu_kernel void @zext_grp_size_1x1x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !1 { 59 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 60 %tmp1 = and i32 %tmp, 1 61 store i32 %tmp1, i32 addrspace(1)* %arg, align 4 62 ret void 63} 64 65; GCN-LABEL: {{^}}zext_grp_size_512: 66; GCN-NOT: and_b32 67 68; OPT-LABEL: @zext_grp_size_512 69; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !6 70; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !6 71; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !6 72define amdgpu_kernel void @zext_grp_size_512(i32 addrspace(1)* nocapture %arg) #1 { 73bb: 74 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 75 %tmp1 = and i32 %tmp, 65535 76 store i32 %tmp1, i32 addrspace(1)* %arg, align 4 77 %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() 78 %tmp3 = and i32 %tmp2, 65535 79 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 80 store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 81 %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() 82 %tmp6 = and i32 %tmp5, 65535 83 %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 84 store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 85 ret void 86} 87 88; GCN-LABEL: {{^}}func_test_workitem_id_x_known_max_range: 89; O2-NOT: and_b32 90; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff, 91; O2-NOT: and_b32 92 93; OPT-LABEL: @func_test_workitem_id_x_known_max_range( 94; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 95define void @func_test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 { 96entry: 97 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 98 %and = and i32 %id, 1023 99 store i32 %and, i32 addrspace(1)* %out, align 4 100 ret void 101} 102 103; GCN-LABEL: {{^}}func_test_workitem_id_x_default_range: 104; O2-NOT: and_b32 105; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff, 106; O2-NOT: and_b32 107 108; OPT-LABEL: @func_test_workitem_id_x_default_range( 109; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !7 110define void @func_test_workitem_id_x_default_range(i32 addrspace(1)* nocapture %out) #4 { 111entry: 112 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 113 %and = and i32 %id, 1023 114 store i32 %and, i32 addrspace(1)* %out, align 4 115 ret void 116} 117 118declare i32 @llvm.amdgcn.workitem.id.x() #2 119 120declare i32 @llvm.amdgcn.workitem.id.y() #2 121 122declare i32 @llvm.amdgcn.workitem.id.z() #2 123 124attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" } 125attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" } 126attributes #2 = { nounwind readnone speculatable } 127attributes #3 = { nounwind readnone } 128attributes #4 = { nounwind } 129 130!0 = !{i32 32, i32 4, i32 1} 131!1 = !{i32 1, i32 1, i32 1} 132 133; OPT: !0 = !{i32 0, i32 128} 134; OPT: !1 = !{i32 32, i32 4, i32 1} 135; OPT: !2 = !{i32 0, i32 32} 136; OPT: !3 = !{i32 0, i32 4} 137; OPT: !4 = !{i32 0, i32 1} 138; OPT: !5 = !{i32 1, i32 1, i32 1} 139; OPT: !6 = !{i32 0, i32 512} 140; OPT: !7 = !{i32 0, i32 1024} 141