1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3; Make sure 64-bit BFE pattern does a 32-bit BFE on the relevant half. 4 5; Extract the high bit of the low half 6; GCN-LABEL: {{^}}v_uextract_bit_31_i64: 7; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 8; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] 9; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 10; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} 11define void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 12 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 13 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 14 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 15 %ld.64 = load i64, i64 addrspace(1)* %in.gep 16 %srl = lshr i64 %ld.64, 31 17 %bit = and i64 %srl, 1 18 store i64 %bit, i64 addrspace(1)* %out.gep 19 ret void 20} 21 22; Extract the high bit of the high half 23; GCN-LABEL: {{^}}v_uextract_bit_63_i64: 24; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 25; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] 26; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 27; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} 28define void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 29 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 30 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 31 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 32 %ld.64 = load i64, i64 addrspace(1)* %in.gep 33 %srl = lshr i64 %ld.64, 63 34 %bit = and i64 %srl, 1 35 store i64 %bit, i64 addrspace(1)* %out.gep 36 ret void 37} 38 39; GCN-LABEL: {{^}}v_uextract_bit_1_i64: 40; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 41; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1 42; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 43; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 44define void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 45 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 46 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 47 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 48 %ld.64 = load i64, i64 addrspace(1)* %in.gep 49 %srl = lshr i64 %ld.64, 1 50 %bit = and i64 %srl, 1 51 store i64 %bit, i64 addrspace(1)* %out.gep 52 ret void 53} 54 55; GCN-LABEL: {{^}}v_uextract_bit_20_i64: 56; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 57; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 1 58; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 59; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 60define void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 61 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 62 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 63 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 64 %ld.64 = load i64, i64 addrspace(1)* %in.gep 65 %srl = lshr i64 %ld.64, 20 66 %bit = and i64 %srl, 1 67 store i64 %bit, i64 addrspace(1)* %out.gep 68 ret void 69} 70 71; GCN-LABEL: {{^}}v_uextract_bit_32_i64: 72; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 73; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 1, [[VAL]] 74; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 75; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}} 76define void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 77 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 78 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 79 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 80 %ld.64 = load i64, i64 addrspace(1)* %in.gep 81 %srl = lshr i64 %ld.64, 32 82 %bit = and i64 %srl, 1 83 store i64 %bit, i64 addrspace(1)* %out.gep 84 ret void 85} 86 87; GCN-LABEL: {{^}}v_uextract_bit_33_i64: 88; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 89; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}} 90; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 91; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} 92define void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 93 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 94 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 95 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 96 %ld.64 = load i64, i64 addrspace(1)* %in.gep 97 %srl = lshr i64 %ld.64, 33 98 %bit = and i64 %srl, 1 99 store i64 %bit, i64 addrspace(1)* %out.gep 100 ret void 101} 102 103; GCN-LABEL: {{^}}v_uextract_bit_20_21_i64: 104; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 105; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 2 106; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 107; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 108define void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 109 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 110 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 111 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 112 %ld.64 = load i64, i64 addrspace(1)* %in.gep 113 %srl = lshr i64 %ld.64, 20 114 %bit = and i64 %srl, 3 115 store i64 %bit, i64 addrspace(1)* %out.gep 116 ret void 117} 118 119; GCN-LABEL: {{^}}v_uextract_bit_1_30_i64: 120; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 121; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30 122; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 123; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 124define void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 125 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 126 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 127 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 128 %ld.64 = load i64, i64 addrspace(1)* %in.gep 129 %srl = lshr i64 %ld.64, 1 130 %bit = and i64 %srl, 1073741823 131 store i64 %bit, i64 addrspace(1)* %out.gep 132 ret void 133} 134 135; GCN-LABEL: {{^}}v_uextract_bit_1_31_i64: 136; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 137; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 1, [[VAL]] 138; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 139; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} 140define void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 141 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 142 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 143 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 144 %ld.64 = load i64, i64 addrspace(1)* %in.gep 145 %srl = lshr i64 %ld.64, 1 146 %bit = and i64 %srl, 2147483647 147 store i64 %bit, i64 addrspace(1)* %out.gep 148 ret void 149} 150 151; Spans the dword boundary, so requires full shift 152; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64: 153; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 154; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31 155; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}} 156; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 157; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}} 158define void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 159 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 160 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 161 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 162 %ld.64 = load i64, i64 addrspace(1)* %in.gep 163 %srl = lshr i64 %ld.64, 31 164 %bit = and i64 %srl, 3 165 store i64 %bit, i64 addrspace(1)* %out.gep 166 ret void 167} 168 169; GCN-LABEL: {{^}}v_uextract_bit_32_33_i64: 170; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 171; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2 172; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 173; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 174define void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 175 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 176 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 177 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 178 %ld.64 = load i64, i64 addrspace(1)* %in.gep 179 %srl = lshr i64 %ld.64, 33 180 %bit = and i64 %srl, 3 181 store i64 %bit, i64 addrspace(1)* %out.gep 182 ret void 183} 184 185; GCN-LABEL: {{^}}v_uextract_bit_30_60_i64: 186; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 187; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 30 188; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}} 189; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 190; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}} 191define void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 192 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 193 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 194 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 195 %ld.64 = load i64, i64 addrspace(1)* %in.gep 196 %srl = lshr i64 %ld.64, 30 197 %bit = and i64 %srl, 1073741823 198 store i64 %bit, i64 addrspace(1)* %out.gep 199 ret void 200} 201 202; GCN-LABEL: {{^}}v_uextract_bit_33_63_i64: 203; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 204; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30 205; GCN-DAG: v_mov_b32_e32 v[[BFE:[0-9]+]], 0{{$}} 206; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} 207define void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 208 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 209 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 210 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 211 %ld.64 = load i64, i64 addrspace(1)* %in.gep 212 %srl = lshr i64 %ld.64, 33 213 %bit = and i64 %srl, 1073741823 214 store i64 %bit, i64 addrspace(1)* %out.gep 215 ret void 216} 217 218; GCN-LABEL: {{^}}v_uextract_bit_31_63_i64: 219; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 220; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31 221; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], 0{{$}} 222; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}} 223define void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 224 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 225 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 226 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 227 %ld.64 = load i64, i64 addrspace(1)* %in.gep 228 %srl = lshr i64 %ld.64, 31 229 %and = and i64 %srl, 4294967295 230 store i64 %and, i64 addrspace(1)* %out 231 ret void 232} 233 234; trunc applied before and mask 235; GCN-LABEL: {{^}}v_uextract_bit_31_i64_trunc_i32: 236; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 237; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] 238; GCN: buffer_store_dword v[[SHIFT]] 239define void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 240 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 241 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 242 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 243 %ld.64 = load i64, i64 addrspace(1)* %in.gep 244 %srl = lshr i64 %ld.64, 31 245 %trunc = trunc i64 %srl to i32 246 %bit = and i32 %trunc, 1 247 store i32 %bit, i32 addrspace(1)* %out.gep 248 ret void 249} 250 251; GCN-LABEL: {{^}}v_uextract_bit_3_i64_trunc_i32: 252; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 253; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 3, 1{{$}} 254; GCN: buffer_store_dword [[BFE]] 255define void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 256 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 257 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 258 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 259 %ld.64 = load i64, i64 addrspace(1)* %in.gep 260 %srl = lshr i64 %ld.64, 3 261 %trunc = trunc i64 %srl to i32 262 %bit = and i32 %trunc, 1 263 store i32 %bit, i32 addrspace(1)* %out.gep 264 ret void 265} 266 267; GCN-LABEL: {{^}}v_uextract_bit_33_i64_trunc_i32: 268; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 269; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 1, 1{{$}} 270; GCN: buffer_store_dword [[BFE]] 271define void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 272 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 273 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 274 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 275 %ld.64 = load i64, i64 addrspace(1)* %in.gep 276 %srl = lshr i64 %ld.64, 33 277 %trunc = trunc i64 %srl to i32 278 %bit = and i32 %trunc, 1 279 store i32 %bit, i32 addrspace(1)* %out.gep 280 ret void 281} 282 283; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64_trunc_i32: 284; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 285; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31 286; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]] 287; GCN-NOT: v[[SHRLO]] 288; GCN: buffer_store_dword v[[SHRLO]] 289define void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 290 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 291 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 292 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x 293 %ld.64 = load i64, i64 addrspace(1)* %in.gep 294 %srl = lshr i64 %ld.64, 31 295 %trunc = trunc i64 %srl to i32 296 %bit = and i32 %trunc, 3 297 store i32 %bit, i32 addrspace(1)* %out.gep 298 ret void 299} 300 301; GCN-LABEL: {{^}}and_not_mask_i64: 302; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}} 303; GCN: v_mov_b32_e32 v[[SHRHI]], 0{{$}} 304; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 20, v[[VALLO]] 305; GCN-DAG: v_and_b32_e32 v[[SHRLO]], 4, [[SHR]] 306; GCN-NOT: v[[SHRLO]] 307; GCN-NOT: v[[SHRHI]] 308; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}} 309define void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 310 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 311 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 312 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 313 %ld.64 = load i64, i64 addrspace(1)* %in.gep 314 %srl = lshr i64 %ld.64, 20 315 %bit = and i64 %srl, 4 316 store i64 %bit, i64 addrspace(1)* %out.gep 317 ret void 318} 319 320; The instruction count is the same with/without hasOneUse, but 321; keeping the 32-bit and has a smaller encoding size than the bfe. 322 323; GCN-LABEL: {{^}}v_uextract_bit_27_29_multi_use_shift_i64: 324; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 325; GCN-DAG: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 27 326; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]] 327; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 328; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}} 329; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}} 330define void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 331 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 332 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 333 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 334 %ld.64 = load i64, i64 addrspace(1)* %in.gep 335 %srl = lshr i64 %ld.64, 27 336 %bit = and i64 %srl, 3 337 store volatile i64 %srl, i64 addrspace(1)* %out 338 store volatile i64 %bit, i64 addrspace(1)* %out 339 ret void 340} 341 342; GCN-LABEL: {{^}}v_uextract_bit_34_37_multi_use_shift_i64: 343; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 344; GCN-DAG: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 2, [[VAL]] 345; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3 346; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 347; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHR]]:[[ZERO]]{{\]}} 348; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 349define void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { 350 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 351 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 352 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x 353 %ld.64 = load i64, i64 addrspace(1)* %in.gep 354 %srl = lshr i64 %ld.64, 34 355 %bit = and i64 %srl, 7 356 store volatile i64 %srl, i64 addrspace(1)* %out 357 store volatile i64 %bit, i64 addrspace(1)* %out 358 ret void 359} 360 361; GCN-LABEL: {{^}}v_uextract_bit_33_36_use_upper_half_shift_i64: 362; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} 363; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 3 364; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 365; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} 366; GCN: buffer_store_dword v[[ZERO]] 367define void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 { 368 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() 369 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x 370 %out0.gep = getelementptr i64, i64 addrspace(1)* %out0, i32 %id.x 371 %out1.gep = getelementptr i32, i32 addrspace(1)* %out1, i32 %id.x 372 %ld.64 = load i64, i64 addrspace(1)* %in.gep 373 %srl = lshr i64 %ld.64, 33 374 %bit = and i64 %srl, 7 375 store volatile i64 %bit, i64 addrspace(1)* %out0.gep 376 377 %srl.srl32 = lshr i64 %srl, 32 378 %srl.hi = trunc i64 %srl.srl32 to i32 379 store volatile i32 %srl.hi, i32 addrspace(1)* %out1.gep 380 ret void 381} 382 383declare i32 @llvm.amdgcn.workitem.id.x() #0 384 385attributes #0 = { nounwind readnone } 386attributes #1 = { nounwind } 387