1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}bfe_i32_arg_arg_arg: 5; GCN: v_bfe_i32 6define amdgpu_kernel void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { 7 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src1) 8 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 9 ret void 10} 11 12; GCN-LABEL: {{^}}bfe_i32_arg_arg_imm: 13; GCN: v_bfe_i32 14define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 15 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 123) 16 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 17 ret void 18} 19 20; GCN-LABEL: {{^}}bfe_i32_arg_imm_arg: 21; GCN: v_bfe_i32 22define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { 23 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 123, i32 %src2) 24 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 25 ret void 26} 27 28; GCN-LABEL: {{^}}bfe_i32_imm_arg_arg: 29; GCN: v_bfe_i32 30define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { 31 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 %src1, i32 %src2) 32 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 33 ret void 34} 35 36; GCN-LABEL: {{^}}v_bfe_print_arg: 37; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8 38define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) #0 { 39 %load = load i32, i32 addrspace(1)* %src0, align 4 40 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 2, i32 8) 41 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 42 ret void 43} 44 45; GCN-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset: 46; GCN-NOT: {{[^@]}}bfe 47; GCN: s_endpgm 48define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 49 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 0) 50 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 51 ret void 52} 53 54; GCN-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset: 55; GCN-NOT: {{[^@]}}bfe 56; GCN: s_endpgm 57define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 58 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 8, i32 0) 59 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 60 ret void 61} 62 63; GCN-LABEL: {{^}}bfe_i32_test_6: 64; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 65; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 66; GCN: s_endpgm 67define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 68 %x = load i32, i32 addrspace(1)* %in, align 4 69 %shl = shl i32 %x, 31 70 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 1, i32 31) 71 store i32 %bfe, i32 addrspace(1)* %out, align 4 72 ret void 73} 74 75; GCN-LABEL: {{^}}bfe_i32_test_7: 76; GCN-NOT: shl 77; GCN-NOT: {{[^@]}}bfe 78; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 79; GCN: buffer_store_dword [[VREG]], 80; GCN: s_endpgm 81define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 82 %x = load i32, i32 addrspace(1)* %in, align 4 83 %shl = shl i32 %x, 31 84 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 0, i32 31) 85 store i32 %bfe, i32 addrspace(1)* %out, align 4 86 ret void 87} 88 89; GCN-LABEL: {{^}}bfe_i32_test_8: 90; GCN: buffer_load_dword 91; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 92; GCN: s_endpgm 93define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 94 %x = load i32, i32 addrspace(1)* %in, align 4 95 %shl = shl i32 %x, 31 96 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 97 store i32 %bfe, i32 addrspace(1)* %out, align 4 98 ret void 99} 100 101; GCN-LABEL: {{^}}bfe_i32_test_9: 102; GCN-NOT: {{[^@]}}bfe 103; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 104; GCN-NOT: {{[^@]}}bfe 105; GCN: s_endpgm 106define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 107 %x = load i32, i32 addrspace(1)* %in, align 4 108 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 31, i32 1) 109 store i32 %bfe, i32 addrspace(1)* %out, align 4 110 ret void 111} 112 113; GCN-LABEL: {{^}}bfe_i32_test_10: 114; GCN-NOT: {{[^@]}}bfe 115; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 116; GCN-NOT: {{[^@]}}bfe 117; GCN: s_endpgm 118define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 119 %x = load i32, i32 addrspace(1)* %in, align 4 120 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 1, i32 31) 121 store i32 %bfe, i32 addrspace(1)* %out, align 4 122 ret void 123} 124 125; GCN-LABEL: {{^}}bfe_i32_test_11: 126; GCN-NOT: {{[^@]}}bfe 127; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} 128; GCN-NOT: {{[^@]}}bfe 129; GCN: s_endpgm 130define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 131 %x = load i32, i32 addrspace(1)* %in, align 4 132 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 8, i32 24) 133 store i32 %bfe, i32 addrspace(1)* %out, align 4 134 ret void 135} 136 137; GCN-LABEL: {{^}}bfe_i32_test_12: 138; GCN-NOT: {{[^@]}}bfe 139; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} 140; GCN-NOT: {{[^@]}}bfe 141; GCN: s_endpgm 142define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 143 %x = load i32, i32 addrspace(1)* %in, align 4 144 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 24, i32 8) 145 store i32 %bfe, i32 addrspace(1)* %out, align 4 146 ret void 147} 148 149; GCN-LABEL: {{^}}bfe_i32_test_13: 150; GCN: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 151; GCN-NOT: {{[^@]}}bfe 152; GCN: s_endpgm 153define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 154 %x = load i32, i32 addrspace(1)* %in, align 4 155 %shl = ashr i32 %x, 31 156 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 157 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 158} 159 160; GCN-LABEL: {{^}}bfe_i32_test_14: 161; GCN-NOT: lshr 162; GCN-NOT: {{[^@]}}bfe 163; GCN: s_endpgm 164define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 165 %x = load i32, i32 addrspace(1)* %in, align 4 166 %shl = lshr i32 %x, 31 167 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 168 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 169} 170 171; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_0: 172; GCN-NOT: {{[^@]}}bfe 173; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 174; GCN: buffer_store_dword [[VREG]], 175; GCN: s_endpgm 176define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { 177 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 0) 178 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 179 ret void 180} 181 182; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_1: 183; GCN-NOT: {{[^@]}}bfe 184; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 185; GCN: buffer_store_dword [[VREG]], 186; GCN: s_endpgm 187define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { 188 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 12334, i32 0, i32 0) 189 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 190 ret void 191} 192 193; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_2: 194; GCN-NOT: {{[^@]}}bfe 195; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 196; GCN: buffer_store_dword [[VREG]], 197; GCN: s_endpgm 198define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { 199 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 1) 200 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 201 ret void 202} 203 204; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_3: 205; GCN-NOT: {{[^@]}}bfe 206; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 207; GCN: buffer_store_dword [[VREG]], 208; GCN: s_endpgm 209define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { 210 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 1, i32 0, i32 1) 211 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 212 ret void 213} 214 215; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_4: 216; GCN-NOT: {{[^@]}}bfe 217; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 218; GCN: buffer_store_dword [[VREG]], 219; GCN: s_endpgm 220define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { 221 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 0, i32 1) 222 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 223 ret void 224} 225 226; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_5: 227; GCN-NOT: {{[^@]}}bfe 228; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 229; GCN: buffer_store_dword [[VREG]], 230; GCN: s_endpgm 231define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { 232 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 7, i32 1) 233 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 234 ret void 235} 236 237; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_6: 238; GCN-NOT: {{[^@]}}bfe 239; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80 240; GCN: buffer_store_dword [[VREG]], 241; GCN: s_endpgm 242define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { 243 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 0, i32 8) 244 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 245 ret void 246} 247 248; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_7: 249; GCN-NOT: {{[^@]}}bfe 250; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 251; GCN: buffer_store_dword [[VREG]], 252; GCN: s_endpgm 253define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { 254 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 0, i32 8) 255 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 256 ret void 257} 258 259; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_8: 260; GCN-NOT: {{[^@]}}bfe 261; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 262; GCN: buffer_store_dword [[VREG]], 263; GCN: s_endpgm 264define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { 265 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 6, i32 8) 266 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 267 ret void 268} 269 270; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_9: 271; GCN-NOT: {{[^@]}}bfe 272; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 273; GCN: buffer_store_dword [[VREG]], 274; GCN: s_endpgm 275define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { 276 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65536, i32 16, i32 8) 277 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 278 ret void 279} 280 281; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_10: 282; GCN-NOT: {{[^@]}}bfe 283; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 284; GCN: buffer_store_dword [[VREG]], 285; GCN: s_endpgm 286define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { 287 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65535, i32 16, i32 16) 288 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 289 ret void 290} 291 292; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_11: 293; GCN-NOT: {{[^@]}}bfe 294; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -6 295; GCN: buffer_store_dword [[VREG]], 296; GCN: s_endpgm 297define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { 298 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 4) 299 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 300 ret void 301} 302 303; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_12: 304; GCN-NOT: {{[^@]}}bfe 305; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 306; GCN: buffer_store_dword [[VREG]], 307; GCN: s_endpgm 308define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { 309 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 31, i32 1) 310 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 311 ret void 312} 313 314; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_13: 315; GCN-NOT: {{[^@]}}bfe 316; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 317; GCN: buffer_store_dword [[VREG]], 318; GCN: s_endpgm 319define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { 320 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 131070, i32 16, i32 16) 321 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 322 ret void 323} 324 325; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_14: 326; GCN-NOT: {{[^@]}}bfe 327; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 40 328; GCN: buffer_store_dword [[VREG]], 329; GCN: s_endpgm 330define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { 331 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 2, i32 30) 332 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 333 ret void 334} 335 336; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_15: 337; GCN-NOT: {{[^@]}}bfe 338; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 339; GCN: buffer_store_dword [[VREG]], 340; GCN: s_endpgm 341define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { 342 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 28) 343 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 344 ret void 345} 346 347; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_16: 348; GCN-NOT: {{[^@]}}bfe 349; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 350; GCN: buffer_store_dword [[VREG]], 351; GCN: s_endpgm 352define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { 353 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 1, i32 7) 354 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 355 ret void 356} 357 358; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_17: 359; GCN-NOT: {{[^@]}}bfe 360; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 361; GCN: buffer_store_dword [[VREG]], 362; GCN: s_endpgm 363define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { 364 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 1, i32 31) 365 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 366 ret void 367} 368 369; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_18: 370; GCN-NOT: {{[^@]}}bfe 371; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 372; GCN: buffer_store_dword [[VREG]], 373; GCN: s_endpgm 374define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { 375 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 31, i32 1) 376 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 377 ret void 378} 379 380; GCN-LABEL: {{^}}bfe_sext_in_reg_i24: 381; GCN: buffer_load_dword [[LOAD:v[0-9]+]], 382; GCN-NOT: v_lshl 383; GCN-NOT: v_ashr 384; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 24 385; GCN: buffer_store_dword [[BFE]], 386define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 387 %x = load i32, i32 addrspace(1)* %in, align 4 388 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 0, i32 24) 389 %shl = shl i32 %bfe, 8 390 %ashr = ashr i32 %shl, 8 391 store i32 %ashr, i32 addrspace(1)* %out, align 4 392 ret void 393} 394 395; GCN-LABEL: @simplify_demanded_bfe_sdiv 396; GCN: buffer_load_dword [[LOAD:v[0-9]+]] 397; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16 398; GCN: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]] 399; GCN: v_add_{{[iu]}}32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]] 400; GCN: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]] 401; GCN: buffer_store_dword [[TMP2]] 402define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 403 %src = load i32, i32 addrspace(1)* %in, align 4 404 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16) 405 %div = sdiv i32 %bfe, 2 406 store i32 %div, i32 addrspace(1)* %out, align 4 407 ret void 408} 409 410; GCN-LABEL: {{^}}bfe_0_width: 411; GCN-NOT: {{[^@]}}bfe 412; GCN: s_endpgm 413define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 414 %load = load i32, i32 addrspace(1)* %ptr, align 4 415 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 8, i32 0) 416 store i32 %bfe, i32 addrspace(1)* %out, align 4 417 ret void 418} 419 420; GCN-LABEL: {{^}}bfe_8_bfe_8: 421; GCN: v_bfe_i32 422; GCN-NOT: {{[^@]}}bfe 423; GCN: s_endpgm 424define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 425 %load = load i32, i32 addrspace(1)* %ptr, align 4 426 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) 427 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) 428 store i32 %bfe1, i32 addrspace(1)* %out, align 4 429 ret void 430} 431 432; GCN-LABEL: {{^}}bfe_8_bfe_16: 433; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 434; GCN: s_endpgm 435define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 436 %load = load i32, i32 addrspace(1)* %ptr, align 4 437 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) 438 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 16) 439 store i32 %bfe1, i32 addrspace(1)* %out, align 4 440 ret void 441} 442 443; This really should be folded into 1 444; GCN-LABEL: {{^}}bfe_16_bfe_8: 445; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 446; GCN-NOT: {{[^@]}}bfe 447; GCN: s_endpgm 448define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 449 %load = load i32, i32 addrspace(1)* %ptr, align 4 450 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 16) 451 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) 452 store i32 %bfe1, i32 addrspace(1)* %out, align 4 453 ret void 454} 455 456; Make sure there isn't a redundant BFE 457; GCN-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe: 458; GCN: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}} 459; GCN-NOT: {{[^@]}}bfe 460; GCN: s_endpgm 461define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 462 %c = add i32 %a, %b ; add to prevent folding into extload 463 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 0, i32 8) 464 %shl = shl i32 %bfe, 24 465 %ashr = ashr i32 %shl, 24 466 store i32 %ashr, i32 addrspace(1)* %out, align 4 467 ret void 468} 469 470; GCN-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong: 471define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 472 %c = add i32 %a, %b ; add to prevent folding into extload 473 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 8, i32 0) 474 %shl = shl i32 %bfe, 24 475 %ashr = ashr i32 %shl, 24 476 store i32 %ashr, i32 addrspace(1)* %out, align 4 477 ret void 478} 479 480; GCN-LABEL: {{^}}sextload_i8_to_i32_bfe: 481; GCN: buffer_load_sbyte 482; GCN-NOT: {{[^@]}}bfe 483; GCN: s_endpgm 484define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { 485 %load = load i8, i8 addrspace(1)* %ptr, align 1 486 %sext = sext i8 %load to i32 487 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 0, i32 8) 488 %shl = shl i32 %bfe, 24 489 %ashr = ashr i32 %shl, 24 490 store i32 %ashr, i32 addrspace(1)* %out, align 4 491 ret void 492} 493 494; GCN: .text 495; GCN-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}} 496; GCN-NOT: {{[^@]}}bfe 497; GCN: s_endpgm 498define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { 499 %load = load i8, i8 addrspace(1)* %ptr, align 1 500 %sext = sext i8 %load to i32 501 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 8, i32 0) 502 %shl = shl i32 %bfe, 24 503 %ashr = ashr i32 %shl, 24 504 store i32 %ashr, i32 addrspace(1)* %out, align 4 505 ret void 506} 507 508; GCN-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0: 509; GCN-NOT: shr 510; GCN-NOT: shl 511; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 512; GCN: s_endpgm 513define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 514 %x = load i32, i32 addrspace(1)* %in, align 4 515 %shl = shl i32 %x, 31 516 %shr = ashr i32 %shl, 31 517 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 0, i32 1) 518 store i32 %bfe, i32 addrspace(1)* %out, align 4 519 ret void 520} 521 522; GCN-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1: 523; GCN: buffer_load_dword 524; GCN-NOT: shl 525; GCN-NOT: shr 526; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1 527; GCN: s_endpgm 528define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 529 %x = load i32, i32 addrspace(1)* %in, align 4 530 %shl = shl i32 %x, 30 531 %shr = ashr i32 %shl, 30 532 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 1) 533 store i32 %bfe, i32 addrspace(1)* %out, align 4 534 ret void 535} 536 537; GCN-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1: 538; GCN: buffer_load_dword 539; GCN-NOT: v_lshl 540; GCN-NOT: v_ashr 541; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2 542; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2 543; GCN: s_endpgm 544define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 545 %x = load i32, i32 addrspace(1)* %in, align 4 546 %shl = shl i32 %x, 30 547 %shr = ashr i32 %shl, 30 548 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 2) 549 store i32 %bfe, i32 addrspace(1)* %out, align 4 550 ret void 551} 552 553declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1 554 555attributes #0 = { nounwind } 556attributes #1 = { nounwind readnone } 557