1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}bfe_u32_arg_arg_arg: 5; GCN: v_bfe_u32 6define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { 7 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1) 8 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 9 ret void 10} 11 12; GCN-LABEL: {{^}}bfe_u32_arg_arg_imm: 13; GCN: v_bfe_u32 14define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 15 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123) 16 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 17 ret void 18} 19 20; GCN-LABEL: {{^}}bfe_u32_arg_imm_arg: 21; GCN: v_bfe_u32 22define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { 23 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2) 24 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 25 ret void 26} 27 28; GCN-LABEL: {{^}}bfe_u32_imm_arg_arg: 29; GCN: v_bfe_u32 30define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { 31 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2) 32 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 33 ret void 34} 35 36; GCN-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset: 37; GCN-NOT: {{[^@]}}bfe 38; GCN: s_endpgm 39define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 40 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0) 41 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 42 ret void 43} 44 45; GCN-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset: 46; GCN-NOT: {{[^@]}}bfe 47; GCN: s_endpgm 48define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 49 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0) 50 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 51 ret void 52} 53 54; GCN-LABEL: {{^}}bfe_u32_zextload_i8: 55; GCN: buffer_load_ubyte 56; GCN-NOT: {{[^@]}}bfe 57; GCN: s_endpgm 58define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 59 %load = load i8, i8 addrspace(1)* %in 60 %ext = zext i8 %load to i32 61 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 62 store i32 %bfe, i32 addrspace(1)* %out, align 4 63 ret void 64} 65 66; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8: 67; GCN: buffer_load_dword 68; GCN: v_add_{{[iu]}}32 69; GCN-NEXT: v_and_b32_e32 70; FIXME: Should be using s_add_i32 71; GCN-NOT: {{[^@]}}bfe 72; GCN: s_endpgm 73define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 74 %load = load i32, i32 addrspace(1)* %in, align 4 75 %add = add i32 %load, 1 76 %ext = and i32 %add, 255 77 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 78 store i32 %bfe, i32 addrspace(1)* %out, align 4 79 ret void 80} 81 82; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16: 83; GCN: buffer_load_dword 84; GCN: v_add_{{[iu]}}32 85; GCN-NEXT: v_and_b32_e32 86; GCN-NOT: {{[^@]}}bfe 87; GCN: s_endpgm 88define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 89 %load = load i32, i32 addrspace(1)* %in, align 4 90 %add = add i32 %load, 1 91 %ext = and i32 %add, 65535 92 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16) 93 store i32 %bfe, i32 addrspace(1)* %out, align 4 94 ret void 95} 96 97; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1: 98; GCN: buffer_load_dword 99; GCN: v_add_{{[iu]}}32 100; GCN: bfe 101; GCN: s_endpgm 102define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 103 %load = load i32, i32 addrspace(1)* %in, align 4 104 %add = add i32 %load, 1 105 %ext = and i32 %add, 255 106 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8) 107 store i32 %bfe, i32 addrspace(1)* %out, align 4 108 ret void 109} 110 111; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3: 112; GCN: buffer_load_dword 113; GCN: v_add_{{[iu]}}32 114; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8 115; GCN-NEXT: bfe 116; GCN: s_endpgm 117define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 118 %load = load i32, i32 addrspace(1)* %in, align 4 119 %add = add i32 %load, 1 120 %ext = and i32 %add, 255 121 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8) 122 store i32 %bfe, i32 addrspace(1)* %out, align 4 123 ret void 124} 125 126; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7: 127; GCN: buffer_load_dword 128; GCN: v_add_{{[iu]}}32 129; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80 130; GCN-NEXT: bfe 131; GCN: s_endpgm 132define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 133 %load = load i32, i32 addrspace(1)* %in, align 4 134 %add = add i32 %load, 1 135 %ext = and i32 %add, 255 136 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8) 137 store i32 %bfe, i32 addrspace(1)* %out, align 4 138 ret void 139} 140 141; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8: 142; GCN: buffer_load_dword 143; GCN: v_add_{{[iu]}}32 144; GCN-NEXT: bfe 145; GCN: s_endpgm 146define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 147 %load = load i32, i32 addrspace(1)* %in, align 4 148 %add = add i32 %load, 1 149 %ext = and i32 %add, 65535 150 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8) 151 store i32 %bfe, i32 addrspace(1)* %out, align 4 152 ret void 153} 154 155; GCN-LABEL: {{^}}bfe_u32_test_1: 156; GCN: buffer_load_dword 157; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} 158; GCN: s_endpgm 159define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 160 %x = load i32, i32 addrspace(1)* %in, align 4 161 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1) 162 store i32 %bfe, i32 addrspace(1)* %out, align 4 163 ret void 164} 165 166define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 167 %x = load i32, i32 addrspace(1)* %in, align 4 168 %shl = shl i32 %x, 31 169 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8) 170 store i32 %bfe, i32 addrspace(1)* %out, align 4 171 ret void 172} 173 174define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 175 %x = load i32, i32 addrspace(1)* %in, align 4 176 %shl = shl i32 %x, 31 177 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1) 178 store i32 %bfe, i32 addrspace(1)* %out, align 4 179 ret void 180} 181 182; GCN-LABEL: {{^}}bfe_u32_test_4: 183; GCN-NOT: lshl 184; GCN-NOT: shr 185; GCN-NOT: {{[^@]}}bfe 186; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 187; GCN: buffer_store_dword [[VREG]], 188; GCN: s_endpgm 189define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 190 %x = load i32, i32 addrspace(1)* %in, align 4 191 %shl = shl i32 %x, 31 192 %shr = lshr i32 %shl, 31 193 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1) 194 store i32 %bfe, i32 addrspace(1)* %out, align 4 195 ret void 196} 197 198; GCN-LABEL: {{^}}bfe_u32_test_5: 199; GCN: buffer_load_dword 200; GCN-NOT: lshl 201; GCN-NOT: shr 202; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1 203; GCN: s_endpgm 204define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 205 %x = load i32, i32 addrspace(1)* %in, align 4 206 %shl = shl i32 %x, 31 207 %shr = ashr i32 %shl, 31 208 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1) 209 store i32 %bfe, i32 addrspace(1)* %out, align 4 210 ret void 211} 212 213; GCN-LABEL: {{^}}bfe_u32_test_6: 214; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 215; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 216; GCN: s_endpgm 217define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 218 %x = load i32, i32 addrspace(1)* %in, align 4 219 %shl = shl i32 %x, 31 220 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31) 221 store i32 %bfe, i32 addrspace(1)* %out, align 4 222 ret void 223} 224 225; GCN-LABEL: {{^}}bfe_u32_test_7: 226; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 227; GCN-NOT: {{[^@]}}bfe 228; GCN: s_endpgm 229define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 230 %x = load i32, i32 addrspace(1)* %in, align 4 231 %shl = shl i32 %x, 31 232 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31) 233 store i32 %bfe, i32 addrspace(1)* %out, align 4 234 ret void 235} 236 237; GCN-LABEL: {{^}}bfe_u32_test_8: 238; GCN-NOT: {{[^@]}}bfe 239; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} 240; GCN-NOT: {{[^@]}}bfe 241; GCN: s_endpgm 242define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 243 %x = load i32, i32 addrspace(1)* %in, align 4 244 %shl = shl i32 %x, 31 245 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 246 store i32 %bfe, i32 addrspace(1)* %out, align 4 247 ret void 248} 249 250; GCN-LABEL: {{^}}bfe_u32_test_9: 251; GCN-NOT: {{[^@]}}bfe 252; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 253; GCN-NOT: {{[^@]}}bfe 254; GCN: s_endpgm 255define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 256 %x = load i32, i32 addrspace(1)* %in, align 4 257 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1) 258 store i32 %bfe, i32 addrspace(1)* %out, align 4 259 ret void 260} 261 262; GCN-LABEL: {{^}}bfe_u32_test_10: 263; GCN-NOT: {{[^@]}}bfe 264; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 265; GCN-NOT: {{[^@]}}bfe 266; GCN: s_endpgm 267define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 268 %x = load i32, i32 addrspace(1)* %in, align 4 269 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31) 270 store i32 %bfe, i32 addrspace(1)* %out, align 4 271 ret void 272} 273 274; GCN-LABEL: {{^}}bfe_u32_test_11: 275; GCN-NOT: {{[^@]}}bfe 276; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} 277; GCN-NOT: {{[^@]}}bfe 278; GCN: s_endpgm 279define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 280 %x = load i32, i32 addrspace(1)* %in, align 4 281 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24) 282 store i32 %bfe, i32 addrspace(1)* %out, align 4 283 ret void 284} 285 286; GCN-LABEL: {{^}}bfe_u32_test_12: 287; GCN-NOT: {{[^@]}}bfe 288; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} 289; GCN-NOT: {{[^@]}}bfe 290; GCN: s_endpgm 291define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 292 %x = load i32, i32 addrspace(1)* %in, align 4 293 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8) 294 store i32 %bfe, i32 addrspace(1)* %out, align 4 295 ret void 296} 297 298; GCN-LABEL: {{^}}bfe_u32_test_13: 299; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 300; GCN-NOT: {{[^@]}}bfe 301; GCN: s_endpgm 302define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 303 %x = load i32, i32 addrspace(1)* %in, align 4 304 %shl = ashr i32 %x, 31 305 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 306 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 307} 308 309; GCN-LABEL: {{^}}bfe_u32_test_14: 310; GCN-NOT: lshr 311; GCN-NOT: {{[^@]}}bfe 312; GCN: s_endpgm 313define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 314 %x = load i32, i32 addrspace(1)* %in, align 4 315 %shl = lshr i32 %x, 31 316 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 317 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 318} 319 320; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_0: 321; GCN-NOT: {{[^@]}}bfe 322; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 323; GCN: buffer_store_dword [[VREG]], 324; GCN: s_endpgm 325; EG-NOT: BFE 326define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { 327 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) 328 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 329 ret void 330} 331 332; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_1: 333; GCN-NOT: {{[^@]}}bfe 334; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 335; GCN: buffer_store_dword [[VREG]], 336; GCN: s_endpgm 337; EG-NOT: BFE 338define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { 339 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0) 340 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 341 ret void 342} 343 344; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_2: 345; GCN-NOT: {{[^@]}}bfe 346; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 347; GCN: buffer_store_dword [[VREG]], 348; GCN: s_endpgm 349; EG-NOT: BFE 350define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { 351 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1) 352 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 353 ret void 354} 355 356; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_3: 357; GCN-NOT: {{[^@]}}bfe 358; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 359; GCN: buffer_store_dword [[VREG]], 360; GCN: s_endpgm 361; EG-NOT: BFE 362define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { 363 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1) 364 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 365 ret void 366} 367 368; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_4: 369; GCN-NOT: {{[^@]}}bfe 370; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 371; GCN: buffer_store_dword [[VREG]], 372; GCN: s_endpgm 373; EG-NOT: BFE 374define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { 375 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1) 376 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 377 ret void 378} 379 380; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_5: 381; GCN-NOT: {{[^@]}}bfe 382; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 383; GCN: buffer_store_dword [[VREG]], 384; GCN: s_endpgm 385; EG-NOT: BFE 386define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { 387 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1) 388 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 389 ret void 390} 391 392; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_6: 393; GCN-NOT: {{[^@]}}bfe 394; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80 395; GCN: buffer_store_dword [[VREG]], 396; GCN: s_endpgm 397; EG-NOT: BFE 398define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { 399 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8) 400 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 401 ret void 402} 403 404; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_7: 405; GCN-NOT: {{[^@]}}bfe 406; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 407; GCN: buffer_store_dword [[VREG]], 408; GCN: s_endpgm 409; EG-NOT: BFE 410define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { 411 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8) 412 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 413 ret void 414} 415 416; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_8: 417; GCN-NOT: {{[^@]}}bfe 418; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 419; GCN: buffer_store_dword [[VREG]], 420; GCN: s_endpgm 421; EG-NOT: BFE 422define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { 423 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8) 424 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 425 ret void 426} 427 428; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_9: 429; GCN-NOT: {{[^@]}}bfe 430; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 431; GCN: buffer_store_dword [[VREG]], 432; GCN: s_endpgm 433; EG-NOT: BFE 434define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { 435 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8) 436 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 437 ret void 438} 439 440; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_10: 441; GCN-NOT: {{[^@]}}bfe 442; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 443; GCN: buffer_store_dword [[VREG]], 444; GCN: s_endpgm 445; EG-NOT: BFE 446define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { 447 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16) 448 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 449 ret void 450} 451 452; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_11: 453; GCN-NOT: {{[^@]}}bfe 454; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 455; GCN: buffer_store_dword [[VREG]], 456; GCN: s_endpgm 457; EG-NOT: BFE 458define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { 459 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4) 460 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 461 ret void 462} 463 464; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_12: 465; GCN-NOT: {{[^@]}}bfe 466; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 467; GCN: buffer_store_dword [[VREG]], 468; GCN: s_endpgm 469; EG-NOT: BFE 470define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { 471 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1) 472 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 473 ret void 474} 475 476; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_13: 477; GCN-NOT: {{[^@]}}bfe 478; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 479; GCN: buffer_store_dword [[VREG]], 480; GCN: s_endpgm 481; EG-NOT: BFE 482define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { 483 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16) 484 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 485 ret void 486} 487 488; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_14: 489; GCN-NOT: {{[^@]}}bfe 490; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 40 491; GCN: buffer_store_dword [[VREG]], 492; GCN: s_endpgm 493; EG-NOT: BFE 494define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { 495 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30) 496 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 497 ret void 498} 499 500; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_15: 501; GCN-NOT: {{[^@]}}bfe 502; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 503; GCN: buffer_store_dword [[VREG]], 504; GCN: s_endpgm 505; EG-NOT: BFE 506define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { 507 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28) 508 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 509 ret void 510} 511 512; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_16: 513; GCN-NOT: {{[^@]}}bfe 514; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 515; GCN: buffer_store_dword [[VREG]], 516; GCN: s_endpgm 517; EG-NOT: BFE 518define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { 519 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7) 520 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 521 ret void 522} 523 524; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_17: 525; GCN-NOT: {{[^@]}}bfe 526; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 527; GCN: buffer_store_dword [[VREG]], 528; GCN: s_endpgm 529; EG-NOT: BFE 530define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { 531 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31) 532 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 533 ret void 534} 535 536; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_18: 537; GCN-NOT: {{[^@]}}bfe 538; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 539; GCN: buffer_store_dword [[VREG]], 540; GCN: s_endpgm 541; EG-NOT: BFE 542define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { 543 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1) 544 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 545 ret void 546} 547 548; Make sure that SimplifyDemandedBits doesn't cause the and to be 549; reduced to the bits demanded by the bfe. 550 551; XXX: The operand to v_bfe_u32 could also just directly be the load register. 552; GCN-LABEL: {{^}}simplify_bfe_u32_multi_use_arg: 553; GCN: buffer_load_dword [[ARG:v[0-9]+]] 554; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]] 555; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2 556; GCN-DAG: buffer_store_dword [[AND]] 557; GCN-DAG: buffer_store_dword [[BFE]] 558; GCN: s_endpgm 559define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, 560 i32 addrspace(1)* %out1, 561 i32 addrspace(1)* %in) #0 { 562 %src = load i32, i32 addrspace(1)* %in, align 4 563 %and = and i32 %src, 63 564 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2) 565 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4 566 store i32 %and, i32 addrspace(1)* %out1, align 4 567 ret void 568} 569 570; GCN-LABEL: {{^}}lshr_and: 571; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 572; GCN: buffer_store_dword 573define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { 574 %b = lshr i32 %a, 6 575 %c = and i32 %b, 7 576 store i32 %c, i32 addrspace(1)* %out, align 8 577 ret void 578} 579 580; GCN-LABEL: {{^}}v_lshr_and: 581; GCN: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3 582; GCN: buffer_store_dword 583define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 584 %c = lshr i32 %a, %b 585 %d = and i32 %c, 7 586 store i32 %d, i32 addrspace(1)* %out, align 8 587 ret void 588} 589 590; GCN-LABEL: {{^}}and_lshr: 591; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 592; GCN: buffer_store_dword 593define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { 594 %b = and i32 %a, 448 595 %c = lshr i32 %b, 6 596 store i32 %c, i32 addrspace(1)* %out, align 8 597 ret void 598} 599 600; GCN-LABEL: {{^}}and_lshr2: 601; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 602; GCN: buffer_store_dword 603define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { 604 %b = and i32 %a, 511 605 %c = lshr i32 %b, 6 606 store i32 %c, i32 addrspace(1)* %out, align 8 607 ret void 608} 609 610; GCN-LABEL: {{^}}shl_lshr: 611; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002 612; GCN: buffer_store_dword 613define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 { 614 %b = shl i32 %a, 9 615 %c = lshr i32 %b, 11 616 store i32 %c, i32 addrspace(1)* %out, align 8 617 ret void 618} 619 620declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1 621 622attributes #0 = { nounwind } 623attributes #1 = { nounwind readnone } 624