1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone 6 7; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg: 8; SI: v_bfe_u32 9; EG: BFE_UINT 10define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { 11 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone 12 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 13 ret void 14} 15 16; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm: 17; SI: v_bfe_u32 18; EG: BFE_UINT 19define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 20 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone 21 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 22 ret void 23} 24 25; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg: 26; SI: v_bfe_u32 27; EG: BFE_UINT 28define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind { 29 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone 30 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 31 ret void 32} 33 34; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg: 35; SI: v_bfe_u32 36; EG: BFE_UINT 37define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind { 38 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone 39 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 40 ret void 41} 42 43; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset: 44; SI-NOT: {{[^@]}}bfe 45; SI: s_endpgm 46; EG-NOT: BFE 47define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 48 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone 49 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 50 ret void 51} 52 53; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset: 54; SI-NOT: {{[^@]}}bfe 55; SI: s_endpgm 56; EG-NOT: BFE 57define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 58 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone 59 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 60 ret void 61} 62 63; FUNC-LABEL: {{^}}bfe_u32_zextload_i8: 64; SI: buffer_load_ubyte 65; SI-NOT: {{[^@]}}bfe 66; SI: s_endpgm 67define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { 68 %load = load i8, i8 addrspace(1)* %in 69 %ext = zext i8 %load to i32 70 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8) 71 store i32 %bfe, i32 addrspace(1)* %out, align 4 72 ret void 73} 74 75; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8: 76; SI: buffer_load_dword 77; SI: v_add_i32 78; SI-NEXT: v_and_b32_e32 79; SI-NOT: {{[^@]}}bfe 80; SI: s_endpgm 81define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 82 %load = load i32, i32 addrspace(1)* %in, align 4 83 %add = add i32 %load, 1 84 %ext = and i32 %add, 255 85 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8) 86 store i32 %bfe, i32 addrspace(1)* %out, align 4 87 ret void 88} 89 90; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16: 91; SI: buffer_load_dword 92; SI: v_add_i32 93; SI-NEXT: v_and_b32_e32 94; SI-NOT: {{[^@]}}bfe 95; SI: s_endpgm 96define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 97 %load = load i32, i32 addrspace(1)* %in, align 4 98 %add = add i32 %load, 1 99 %ext = and i32 %add, 65535 100 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16) 101 store i32 %bfe, i32 addrspace(1)* %out, align 4 102 ret void 103} 104 105; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1: 106; SI: buffer_load_dword 107; SI: v_add_i32 108; SI: bfe 109; SI: s_endpgm 110define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 111 %load = load i32, i32 addrspace(1)* %in, align 4 112 %add = add i32 %load, 1 113 %ext = and i32 %add, 255 114 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8) 115 store i32 %bfe, i32 addrspace(1)* %out, align 4 116 ret void 117} 118 119; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3: 120; SI: buffer_load_dword 121; SI: v_add_i32 122; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8 123; SI-NEXT: bfe 124; SI: s_endpgm 125define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 126 %load = load i32, i32 addrspace(1)* %in, align 4 127 %add = add i32 %load, 1 128 %ext = and i32 %add, 255 129 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8) 130 store i32 %bfe, i32 addrspace(1)* %out, align 4 131 ret void 132} 133 134; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7: 135; SI: buffer_load_dword 136; SI: v_add_i32 137; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80 138; SI-NEXT: bfe 139; SI: s_endpgm 140define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 141 %load = load i32, i32 addrspace(1)* %in, align 4 142 %add = add i32 %load, 1 143 %ext = and i32 %add, 255 144 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8) 145 store i32 %bfe, i32 addrspace(1)* %out, align 4 146 ret void 147} 148 149; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8: 150; SI: buffer_load_dword 151; SI: v_add_i32 152; SI-NEXT: bfe 153; SI: s_endpgm 154define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 155 %load = load i32, i32 addrspace(1)* %in, align 4 156 %add = add i32 %load, 1 157 %ext = and i32 %add, 65535 158 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8) 159 store i32 %bfe, i32 addrspace(1)* %out, align 4 160 ret void 161} 162 163; FUNC-LABEL: {{^}}bfe_u32_test_1: 164; SI: buffer_load_dword 165; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} 166; SI: s_endpgm 167; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1, 168define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 169 %x = load i32, i32 addrspace(1)* %in, align 4 170 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1) 171 store i32 %bfe, i32 addrspace(1)* %out, align 4 172 ret void 173} 174 175define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 176 %x = load i32, i32 addrspace(1)* %in, align 4 177 %shl = shl i32 %x, 31 178 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8) 179 store i32 %bfe, i32 addrspace(1)* %out, align 4 180 ret void 181} 182 183define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 184 %x = load i32, i32 addrspace(1)* %in, align 4 185 %shl = shl i32 %x, 31 186 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1) 187 store i32 %bfe, i32 addrspace(1)* %out, align 4 188 ret void 189} 190 191; FUNC-LABEL: {{^}}bfe_u32_test_4: 192; SI-NOT: lshl 193; SI-NOT: shr 194; SI-NOT: {{[^@]}}bfe 195; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 196; SI: buffer_store_dword [[VREG]], 197; SI: s_endpgm 198define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 199 %x = load i32, i32 addrspace(1)* %in, align 4 200 %shl = shl i32 %x, 31 201 %shr = lshr i32 %shl, 31 202 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1) 203 store i32 %bfe, i32 addrspace(1)* %out, align 4 204 ret void 205} 206 207; FUNC-LABEL: {{^}}bfe_u32_test_5: 208; SI: buffer_load_dword 209; SI-NOT: lshl 210; SI-NOT: shr 211; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1 212; SI: s_endpgm 213define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 214 %x = load i32, i32 addrspace(1)* %in, align 4 215 %shl = shl i32 %x, 31 216 %shr = ashr i32 %shl, 31 217 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1) 218 store i32 %bfe, i32 addrspace(1)* %out, align 4 219 ret void 220} 221 222; FUNC-LABEL: {{^}}bfe_u32_test_6: 223; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 224; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 225; SI: s_endpgm 226define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 227 %x = load i32, i32 addrspace(1)* %in, align 4 228 %shl = shl i32 %x, 31 229 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31) 230 store i32 %bfe, i32 addrspace(1)* %out, align 4 231 ret void 232} 233 234; FUNC-LABEL: {{^}}bfe_u32_test_7: 235; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 236; SI-NOT: {{[^@]}}bfe 237; SI: s_endpgm 238define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 239 %x = load i32, i32 addrspace(1)* %in, align 4 240 %shl = shl i32 %x, 31 241 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31) 242 store i32 %bfe, i32 addrspace(1)* %out, align 4 243 ret void 244} 245 246; FUNC-LABEL: {{^}}bfe_u32_test_8: 247; SI-NOT: {{[^@]}}bfe 248; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} 249; SI-NOT: {{[^@]}}bfe 250; SI: s_endpgm 251define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 252 %x = load i32, i32 addrspace(1)* %in, align 4 253 %shl = shl i32 %x, 31 254 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) 255 store i32 %bfe, i32 addrspace(1)* %out, align 4 256 ret void 257} 258 259; FUNC-LABEL: {{^}}bfe_u32_test_9: 260; SI-NOT: {{[^@]}}bfe 261; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 262; SI-NOT: {{[^@]}}bfe 263; SI: s_endpgm 264define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 265 %x = load i32, i32 addrspace(1)* %in, align 4 266 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1) 267 store i32 %bfe, i32 addrspace(1)* %out, align 4 268 ret void 269} 270 271; FUNC-LABEL: {{^}}bfe_u32_test_10: 272; SI-NOT: {{[^@]}}bfe 273; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 274; SI-NOT: {{[^@]}}bfe 275; SI: s_endpgm 276define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 277 %x = load i32, i32 addrspace(1)* %in, align 4 278 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31) 279 store i32 %bfe, i32 addrspace(1)* %out, align 4 280 ret void 281} 282 283; FUNC-LABEL: {{^}}bfe_u32_test_11: 284; SI-NOT: {{[^@]}}bfe 285; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} 286; SI-NOT: {{[^@]}}bfe 287; SI: s_endpgm 288define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 289 %x = load i32, i32 addrspace(1)* %in, align 4 290 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24) 291 store i32 %bfe, i32 addrspace(1)* %out, align 4 292 ret void 293} 294 295; FUNC-LABEL: {{^}}bfe_u32_test_12: 296; SI-NOT: {{[^@]}}bfe 297; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} 298; SI-NOT: {{[^@]}}bfe 299; SI: s_endpgm 300define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 301 %x = load i32, i32 addrspace(1)* %in, align 4 302 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8) 303 store i32 %bfe, i32 addrspace(1)* %out, align 4 304 ret void 305} 306 307; FUNC-LABEL: {{^}}bfe_u32_test_13: 308; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 309; SI-NOT: {{[^@]}}bfe 310; SI: s_endpgm 311define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 312 %x = load i32, i32 addrspace(1)* %in, align 4 313 %shl = ashr i32 %x, 31 314 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) 315 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 316} 317 318; FUNC-LABEL: {{^}}bfe_u32_test_14: 319; SI-NOT: lshr 320; SI-NOT: {{[^@]}}bfe 321; SI: s_endpgm 322define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 323 %x = load i32, i32 addrspace(1)* %in, align 4 324 %shl = lshr i32 %x, 31 325 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) 326 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 327} 328 329; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0: 330; SI-NOT: {{[^@]}}bfe 331; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 332; SI: buffer_store_dword [[VREG]], 333; SI: s_endpgm 334; EG-NOT: BFE 335define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind { 336 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone 337 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 338 ret void 339} 340 341; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1: 342; SI-NOT: {{[^@]}}bfe 343; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 344; SI: buffer_store_dword [[VREG]], 345; SI: s_endpgm 346; EG-NOT: BFE 347define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind { 348 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone 349 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 350 ret void 351} 352 353; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2: 354; SI-NOT: {{[^@]}}bfe 355; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 356; SI: buffer_store_dword [[VREG]], 357; SI: s_endpgm 358; EG-NOT: BFE 359define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind { 360 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone 361 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 362 ret void 363} 364 365; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3: 366; SI-NOT: {{[^@]}}bfe 367; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 368; SI: buffer_store_dword [[VREG]], 369; SI: s_endpgm 370; EG-NOT: BFE 371define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind { 372 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone 373 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 374 ret void 375} 376 377; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4: 378; SI-NOT: {{[^@]}}bfe 379; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 380; SI: buffer_store_dword [[VREG]], 381; SI: s_endpgm 382; EG-NOT: BFE 383define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind { 384 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone 385 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 386 ret void 387} 388 389; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5: 390; SI-NOT: {{[^@]}}bfe 391; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 392; SI: buffer_store_dword [[VREG]], 393; SI: s_endpgm 394; EG-NOT: BFE 395define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind { 396 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone 397 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 398 ret void 399} 400 401; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6: 402; SI-NOT: {{[^@]}}bfe 403; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80 404; SI: buffer_store_dword [[VREG]], 405; SI: s_endpgm 406; EG-NOT: BFE 407define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind { 408 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone 409 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 410 ret void 411} 412 413; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7: 414; SI-NOT: {{[^@]}}bfe 415; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 416; SI: buffer_store_dword [[VREG]], 417; SI: s_endpgm 418; EG-NOT: BFE 419define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind { 420 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone 421 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 422 ret void 423} 424 425; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8: 426; SI-NOT: {{[^@]}}bfe 427; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 428; SI: buffer_store_dword [[VREG]], 429; SI: s_endpgm 430; EG-NOT: BFE 431define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind { 432 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone 433 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 434 ret void 435} 436 437; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9: 438; SI-NOT: {{[^@]}}bfe 439; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 440; SI: buffer_store_dword [[VREG]], 441; SI: s_endpgm 442; EG-NOT: BFE 443define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind { 444 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone 445 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 446 ret void 447} 448 449; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10: 450; SI-NOT: {{[^@]}}bfe 451; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 452; SI: buffer_store_dword [[VREG]], 453; SI: s_endpgm 454; EG-NOT: BFE 455define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind { 456 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone 457 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 458 ret void 459} 460 461; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11: 462; SI-NOT: {{[^@]}}bfe 463; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 464; SI: buffer_store_dword [[VREG]], 465; SI: s_endpgm 466; EG-NOT: BFE 467define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind { 468 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone 469 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 470 ret void 471} 472 473; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12: 474; SI-NOT: {{[^@]}}bfe 475; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 476; SI: buffer_store_dword [[VREG]], 477; SI: s_endpgm 478; EG-NOT: BFE 479define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind { 480 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone 481 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 482 ret void 483} 484 485; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13: 486; SI-NOT: {{[^@]}}bfe 487; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 488; SI: buffer_store_dword [[VREG]], 489; SI: s_endpgm 490; EG-NOT: BFE 491define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind { 492 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone 493 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 494 ret void 495} 496 497; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14: 498; SI-NOT: {{[^@]}}bfe 499; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40 500; SI: buffer_store_dword [[VREG]], 501; SI: s_endpgm 502; EG-NOT: BFE 503define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind { 504 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone 505 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 506 ret void 507} 508 509; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15: 510; SI-NOT: {{[^@]}}bfe 511; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 512; SI: buffer_store_dword [[VREG]], 513; SI: s_endpgm 514; EG-NOT: BFE 515define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind { 516 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone 517 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 518 ret void 519} 520 521; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16: 522; SI-NOT: {{[^@]}}bfe 523; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 524; SI: buffer_store_dword [[VREG]], 525; SI: s_endpgm 526; EG-NOT: BFE 527define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { 528 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone 529 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 530 ret void 531} 532 533; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17: 534; SI-NOT: {{[^@]}}bfe 535; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f 536; SI: buffer_store_dword [[VREG]], 537; SI: s_endpgm 538; EG-NOT: BFE 539define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind { 540 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone 541 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 542 ret void 543} 544 545; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18: 546; SI-NOT: {{[^@]}}bfe 547; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 548; SI: buffer_store_dword [[VREG]], 549; SI: s_endpgm 550; EG-NOT: BFE 551define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { 552 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone 553 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 554 ret void 555} 556 557; Make sure that SimplifyDemandedBits doesn't cause the and to be 558; reduced to the bits demanded by the bfe. 559 560; XXX: The operand to v_bfe_u32 could also just directly be the load register. 561; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg: 562; SI: buffer_load_dword [[ARG:v[0-9]+]] 563; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]] 564; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2 565; SI-DAG: buffer_store_dword [[AND]] 566; SI-DAG: buffer_store_dword [[BFE]] 567; SI: s_endpgm 568define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, 569 i32 addrspace(1)* %out1, 570 i32 addrspace(1)* %in) nounwind { 571 %src = load i32, i32 addrspace(1)* %in, align 4 572 %and = and i32 %src, 63 573 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone 574 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4 575 store i32 %and, i32 addrspace(1)* %out1, align 4 576 ret void 577} 578 579; FUNC-LABEL: {{^}}lshr_and: 580; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 581; SI: buffer_store_dword 582define void @lshr_and(i32 addrspace(1)* %out, i32 %a) nounwind { 583 %b = lshr i32 %a, 6 584 %c = and i32 %b, 7 585 store i32 %c, i32 addrspace(1)* %out, align 8 586 ret void 587} 588 589; FUNC-LABEL: {{^}}v_lshr_and: 590; SI: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3 591; SI: buffer_store_dword 592define void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 593 %c = lshr i32 %a, %b 594 %d = and i32 %c, 7 595 store i32 %d, i32 addrspace(1)* %out, align 8 596 ret void 597} 598 599; FUNC-LABEL: {{^}}and_lshr: 600; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 601; SI: buffer_store_dword 602define void @and_lshr(i32 addrspace(1)* %out, i32 %a) nounwind { 603 %b = and i32 %a, 448 604 %c = lshr i32 %b, 6 605 store i32 %c, i32 addrspace(1)* %out, align 8 606 ret void 607} 608 609; FUNC-LABEL: {{^}}and_lshr2: 610; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006 611; SI: buffer_store_dword 612define void @and_lshr2(i32 addrspace(1)* %out, i32 %a) nounwind { 613 %b = and i32 %a, 511 614 %c = lshr i32 %b, 6 615 store i32 %c, i32 addrspace(1)* %out, align 8 616 ret void 617} 618 619; FUNC-LABEL: {{^}}shl_lshr: 620; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002 621; SI: buffer_store_dword 622define void @shl_lshr(i32 addrspace(1)* %out, i32 %a) nounwind { 623 %b = shl i32 %a, 9 624 %c = lshr i32 %b, 11 625 store i32 %c, i32 addrspace(1)* %out, align 8 626 ret void 627} 628