1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}alignbit_shr_pat: 4; GCN-DAG: s_load_dword s[[SHR:[0-9]+]] 5; GCN-DAG: load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} 6; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], s[[SHR]] 7 8define amdgpu_kernel void @alignbit_shr_pat(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) { 9bb: 10 %tmp = load i64, i64 addrspace(1)* %arg, align 8 11 %tmp3 = and i32 %arg2, 31 12 %tmp4 = zext i32 %tmp3 to i64 13 %tmp5 = lshr i64 %tmp, %tmp4 14 %tmp6 = trunc i64 %tmp5 to i32 15 store i32 %tmp6, i32 addrspace(1)* %arg1, align 4 16 ret void 17} 18 19; GCN-LABEL: {{^}}alignbit_shr_pat_v: 20; GCN-DAG: load_dword v[[SHR:[0-9]+]], 21; GCN-DAG: load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} 22; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], v[[SHR]] 23 24define amdgpu_kernel void @alignbit_shr_pat_v(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) { 25bb: 26 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() 27 %gep1 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %tid 28 %tmp = load i64, i64 addrspace(1)* %gep1, align 8 29 %gep2 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tid 30 %amt = load i32, i32 addrspace(1)* %gep2, align 4 31 %tmp3 = and i32 %amt, 31 32 %tmp4 = zext i32 %tmp3 to i64 33 %tmp5 = lshr i64 %tmp, %tmp4 34 %tmp6 = trunc i64 %tmp5 to i32 35 store i32 %tmp6, i32 addrspace(1)* %gep2, align 4 36 ret void 37} 38 39; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_and30: 40; Negative test, wrong constant 41; GCN: v_lshr_b64 42; GCN-NOT: v_alignbit_b32 43 44define amdgpu_kernel void @alignbit_shr_pat_wrong_and30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) { 45bb: 46 %tmp = load i64, i64 addrspace(1)* %arg, align 8 47 %tmp3 = and i32 %arg2, 30 48 %tmp4 = zext i32 %tmp3 to i64 49 %tmp5 = lshr i64 %tmp, %tmp4 50 %tmp6 = trunc i64 %tmp5 to i32 51 store i32 %tmp6, i32 addrspace(1)* %arg1, align 4 52 ret void 53} 54 55; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_and63: 56; Negative test, wrong constant 57; GCN: v_lshr_b64 58; GCN-NOT: v_alignbit_b32 59 60define amdgpu_kernel void @alignbit_shr_pat_wrong_and63(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) { 61bb: 62 %tmp = load i64, i64 addrspace(1)* %arg, align 8 63 %tmp3 = and i32 %arg2, 63 64 %tmp4 = zext i32 %tmp3 to i64 65 %tmp5 = lshr i64 %tmp, %tmp4 66 %tmp6 = trunc i64 %tmp5 to i32 67 store i32 %tmp6, i32 addrspace(1)* %arg1, align 4 68 ret void 69} 70 71; GCN-LABEL: {{^}}alignbit_shr_pat_const30: 72; GCN: load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} 73; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], 30 74 75define amdgpu_kernel void @alignbit_shr_pat_const30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) { 76bb: 77 %tmp = load i64, i64 addrspace(1)* %arg, align 8 78 %tmp5 = lshr i64 %tmp, 30 79 %tmp6 = trunc i64 %tmp5 to i32 80 store i32 %tmp6, i32 addrspace(1)* %arg1, align 4 81 ret void 82} 83 84; GCN-LABEL: {{^}}alignbit_shr_pat_wrong_const33: 85; Negative test, shift amount more than 31 86; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 87; GCN-NOT: v_alignbit_b32 88 89define amdgpu_kernel void @alignbit_shr_pat_wrong_const33(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) { 90bb: 91 %tmp = load i64, i64 addrspace(1)* %arg, align 8 92 %tmp5 = lshr i64 %tmp, 33 93 %tmp6 = trunc i64 %tmp5 to i32 94 store i32 %tmp6, i32 addrspace(1)* %arg1, align 4 95 ret void 96} 97 98declare i32 @llvm.amdgcn.workitem.id.x() #0 99 100attributes #0 = { nounwind readnone speculatable } 101