1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}v_ubfe_sub_i32: 5; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 6; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 7; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]] 8define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 9 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 10 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 11 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 12 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 13 %src = load volatile i32, i32 addrspace(1)* %in0.gep 14 %width = load volatile i32, i32 addrspace(1)* %in0.gep 15 %sub = sub i32 32, %width 16 %shl = shl i32 %src, %sub 17 %bfe = lshr i32 %shl, %sub 18 store i32 %bfe, i32 addrspace(1)* %out.gep 19 ret void 20} 21 22; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32: 23; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 24; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 25; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] 26 27; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] 28; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] 29 30; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] 31; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] 32 33; GCN: [[BFE]] 34; GCN: [[SHL]] 35define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 36 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 37 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 38 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 39 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 40 %src = load volatile i32, i32 addrspace(1)* %in0.gep 41 %width = load volatile i32, i32 addrspace(1)* %in0.gep 42 %sub = sub i32 32, %width 43 %shl = shl i32 %src, %sub 44 %bfe = lshr i32 %shl, %sub 45 store i32 %bfe, i32 addrspace(1)* %out.gep 46 store volatile i32 %shl, i32 addrspace(1)* undef 47 ret void 48} 49 50; GCN-LABEL: {{^}}s_ubfe_sub_i32: 51; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} 52; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] 53; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]] 54; GCN: s_lshr_b32 s{{[0-9]+}}, [[TMP]], [[SUB]] 55define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 56 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 57 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 58 %sub = sub i32 32, %width 59 %shl = shl i32 %src, %sub 60 %bfe = lshr i32 %shl, %sub 61 store i32 %bfe, i32 addrspace(1)* %out.gep 62 ret void 63} 64 65; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32: 66; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} 67; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] 68; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]] 69; GCN: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]] 70define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 71 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 72 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 73 %sub = sub i32 32, %width 74 %shl = shl i32 %src, %sub 75 %bfe = lshr i32 %shl, %sub 76 store i32 %bfe, i32 addrspace(1)* %out.gep 77 store volatile i32 %shl, i32 addrspace(1)* undef 78 ret void 79} 80 81; GCN-LABEL: {{^}}v_sbfe_sub_i32: 82; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 83; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 84; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]] 85define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 86 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 87 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 88 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 89 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 90 %src = load volatile i32, i32 addrspace(1)* %in0.gep 91 %width = load volatile i32, i32 addrspace(1)* %in0.gep 92 %sub = sub i32 32, %width 93 %shl = shl i32 %src, %sub 94 %bfe = ashr i32 %shl, %sub 95 store i32 %bfe, i32 addrspace(1)* %out.gep 96 ret void 97} 98 99; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32: 100; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] 101; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] 102; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] 103 104; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] 105; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] 106 107; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] 108; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] 109 110; GCN: [[BFE]] 111; GCN: [[SHL]] 112define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 { 113 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 114 %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x 115 %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x 116 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 117 %src = load volatile i32, i32 addrspace(1)* %in0.gep 118 %width = load volatile i32, i32 addrspace(1)* %in0.gep 119 %sub = sub i32 32, %width 120 %shl = shl i32 %src, %sub 121 %bfe = ashr i32 %shl, %sub 122 store i32 %bfe, i32 addrspace(1)* %out.gep 123 store volatile i32 %shl, i32 addrspace(1)* undef 124 ret void 125} 126 127; GCN-LABEL: {{^}}s_sbfe_sub_i32: 128; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} 129; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] 130; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]] 131; GCN: s_ashr_i32 s{{[0-9]+}}, [[TMP]], [[SUB]] 132define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 133 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 134 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 135 %sub = sub i32 32, %width 136 %shl = shl i32 %src, %sub 137 %bfe = ashr i32 %shl, %sub 138 store i32 %bfe, i32 addrspace(1)* %out.gep 139 ret void 140} 141 142; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32: 143; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} 144; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] 145; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]] 146; GCN: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]] 147define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { 148 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 149 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 150 %sub = sub i32 32, %width 151 %shl = shl i32 %src, %sub 152 %bfe = ashr i32 %shl, %sub 153 store i32 %bfe, i32 addrspace(1)* %out.gep 154 store volatile i32 %shl, i32 addrspace(1)* undef 155 ret void 156} 157 158declare i32 @llvm.amdgcn.workitem.id.x() #0 159 160attributes #0 = { nounwind readnone } 161attributes #1 = { nounwind } 162