1; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s 2 3declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) #0 4 5; CHECK-LABEL: {{^}}ds_bpermute: 6; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 7define amdgpu_kernel void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind { 8 %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0 9 store i32 %bpermute, i32 addrspace(1)* %out, align 4 10 ret void 11} 12 13; CHECK-LABEL: {{^}}ds_bpermute_imm_offset: 14; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 15define amdgpu_kernel void @ds_bpermute_imm_offset(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind { 16 %index = add i32 %base_index, 4 17 %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0 18 store i32 %bpermute, i32 addrspace(1)* %out, align 4 19 ret void 20} 21 22; CHECK-LABEL: {{^}}ds_bpermute_imm_index: 23; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:64 24define amdgpu_kernel void @ds_bpermute_imm_index(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind { 25 %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 64, i32 %src) #0 26 store i32 %bpermute, i32 addrspace(1)* %out, align 4 27 ret void 28} 29 30; CHECK-LABEL: {{^}}ds_bpermute_add_shl: 31; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 32; CHECK: s_waitcnt lgkmcnt 33define void @ds_bpermute_add_shl(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind { 34 %index = add i32 %base_index, 1 35 %byte_index = shl i32 %index, 2 36 %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0 37 store i32 %bpermute, i32 addrspace(1)* %out, align 4 38 ret void 39} 40 41; CHECK-LABEL: {{^}}ds_bpermute_or_shl: 42; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 43; CHECK: s_waitcnt lgkmcnt 44define void @ds_bpermute_or_shl(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind { 45 %masked = and i32 %base_index, 62 46 %index = or i32 %masked, 1 47 %byte_index = shl i32 %index, 2 48 %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0 49 store i32 %bpermute, i32 addrspace(1)* %out, align 4 50 ret void 51} 52 53attributes #0 = { nounwind readnone convergent } 54