1; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 3 4declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone 5 6; FUNC-LABEL: @bfe_i32_arg_arg_arg 7; SI: V_BFE_I32 8; EG: BFE_INT 9; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac 10define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { 11 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone 12 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 13 ret void 14} 15 16; FUNC-LABEL: @bfe_i32_arg_arg_imm 17; SI: V_BFE_I32 18; EG: BFE_INT 19define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 20 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone 21 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 22 ret void 23} 24 25; FUNC-LABEL: @bfe_i32_arg_imm_arg 26; SI: V_BFE_I32 27; EG: BFE_INT 28define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind { 29 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone 30 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 31 ret void 32} 33 34; FUNC-LABEL: @bfe_i32_imm_arg_arg 35; SI: V_BFE_I32 36; EG: BFE_INT 37define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind { 38 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone 39 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 40 ret void 41} 42 43; FUNC-LABEL: @v_bfe_print_arg 44; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8 45define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind { 46 %load = load i32 addrspace(1)* %src0, align 4 47 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone 48 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 49 ret void 50} 51 52; FUNC-LABEL: @bfe_i32_arg_0_width_reg_offset 53; SI-NOT: BFE 54; SI: S_ENDPGM 55; EG-NOT: BFE 56define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 57 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone 58 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 59 ret void 60} 61 62; FUNC-LABEL: @bfe_i32_arg_0_width_imm_offset 63; SI-NOT: BFE 64; SI: S_ENDPGM 65; EG-NOT: BFE 66define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { 67 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone 68 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 69 ret void 70} 71 72; FUNC-LABEL: @bfe_i32_test_6 73; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 74; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 75; SI: S_ENDPGM 76define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 77 %x = load i32 addrspace(1)* %in, align 4 78 %shl = shl i32 %x, 31 79 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31) 80 store i32 %bfe, i32 addrspace(1)* %out, align 4 81 ret void 82} 83 84; FUNC-LABEL: @bfe_i32_test_7 85; SI-NOT: SHL 86; SI-NOT: BFE 87; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 88; SI: BUFFER_STORE_DWORD [[VREG]], 89; SI: S_ENDPGM 90define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 91 %x = load i32 addrspace(1)* %in, align 4 92 %shl = shl i32 %x, 31 93 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31) 94 store i32 %bfe, i32 addrspace(1)* %out, align 4 95 ret void 96} 97 98; FIXME: The shifts should be 1 BFE 99; FUNC-LABEL: @bfe_i32_test_8 100; SI: BUFFER_LOAD_DWORD 101; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 102; SI: S_ENDPGM 103define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 104 %x = load i32 addrspace(1)* %in, align 4 105 %shl = shl i32 %x, 31 106 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) 107 store i32 %bfe, i32 addrspace(1)* %out, align 4 108 ret void 109} 110 111; FUNC-LABEL: @bfe_i32_test_9 112; SI-NOT: BFE 113; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} 114; SI-NOT: BFE 115; SI: S_ENDPGM 116define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 117 %x = load i32 addrspace(1)* %in, align 4 118 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1) 119 store i32 %bfe, i32 addrspace(1)* %out, align 4 120 ret void 121} 122 123; FUNC-LABEL: @bfe_i32_test_10 124; SI-NOT: BFE 125; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} 126; SI-NOT: BFE 127; SI: S_ENDPGM 128define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 129 %x = load i32 addrspace(1)* %in, align 4 130 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31) 131 store i32 %bfe, i32 addrspace(1)* %out, align 4 132 ret void 133} 134 135; FUNC-LABEL: @bfe_i32_test_11 136; SI-NOT: BFE 137; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} 138; SI-NOT: BFE 139; SI: S_ENDPGM 140define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 141 %x = load i32 addrspace(1)* %in, align 4 142 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24) 143 store i32 %bfe, i32 addrspace(1)* %out, align 4 144 ret void 145} 146 147; FUNC-LABEL: @bfe_i32_test_12 148; SI-NOT: BFE 149; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} 150; SI-NOT: BFE 151; SI: S_ENDPGM 152define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 153 %x = load i32 addrspace(1)* %in, align 4 154 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8) 155 store i32 %bfe, i32 addrspace(1)* %out, align 4 156 ret void 157} 158 159; FUNC-LABEL: @bfe_i32_test_13 160; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 161; SI-NOT: BFE 162; SI: S_ENDPGM 163define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 164 %x = load i32 addrspace(1)* %in, align 4 165 %shl = ashr i32 %x, 31 166 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) 167 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 168} 169 170; FUNC-LABEL: @bfe_i32_test_14 171; SI-NOT: LSHR 172; SI-NOT: BFE 173; SI: S_ENDPGM 174define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 175 %x = load i32 addrspace(1)* %in, align 4 176 %shl = lshr i32 %x, 31 177 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) 178 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 179} 180 181; FUNC-LABEL: @bfe_i32_constant_fold_test_0 182; SI-NOT: BFE 183; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 184; SI: BUFFER_STORE_DWORD [[VREG]], 185; SI: S_ENDPGM 186; EG-NOT: BFE 187define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind { 188 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone 189 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 190 ret void 191} 192 193; FUNC-LABEL: @bfe_i32_constant_fold_test_1 194; SI-NOT: BFE 195; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 196; SI: BUFFER_STORE_DWORD [[VREG]], 197; SI: S_ENDPGM 198; EG-NOT: BFE 199define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind { 200 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone 201 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 202 ret void 203} 204 205; FUNC-LABEL: @bfe_i32_constant_fold_test_2 206; SI-NOT: BFE 207; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 208; SI: BUFFER_STORE_DWORD [[VREG]], 209; SI: S_ENDPGM 210; EG-NOT: BFE 211define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind { 212 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone 213 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 214 ret void 215} 216 217; FUNC-LABEL: @bfe_i32_constant_fold_test_3 218; SI-NOT: BFE 219; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 220; SI: BUFFER_STORE_DWORD [[VREG]], 221; SI: S_ENDPGM 222; EG-NOT: BFE 223define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind { 224 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone 225 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 226 ret void 227} 228 229; FUNC-LABEL: @bfe_i32_constant_fold_test_4 230; SI-NOT: BFE 231; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 232; SI: BUFFER_STORE_DWORD [[VREG]], 233; SI: S_ENDPGM 234; EG-NOT: BFE 235define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind { 236 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone 237 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 238 ret void 239} 240 241; FUNC-LABEL: @bfe_i32_constant_fold_test_5 242; SI-NOT: BFE 243; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 244; SI: BUFFER_STORE_DWORD [[VREG]], 245; SI: S_ENDPGM 246; EG-NOT: BFE 247define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind { 248 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone 249 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 250 ret void 251} 252 253; FUNC-LABEL: @bfe_i32_constant_fold_test_6 254; SI-NOT: BFE 255; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80 256; SI: BUFFER_STORE_DWORD [[VREG]], 257; SI: S_ENDPGM 258; EG-NOT: BFE 259define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind { 260 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone 261 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 262 ret void 263} 264 265; FUNC-LABEL: @bfe_i32_constant_fold_test_7 266; SI-NOT: BFE 267; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f 268; SI: BUFFER_STORE_DWORD [[VREG]], 269; SI: S_ENDPGM 270; EG-NOT: BFE 271define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind { 272 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone 273 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 274 ret void 275} 276 277; FUNC-LABEL: @bfe_i32_constant_fold_test_8 278; SI-NOT: BFE 279; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 280; SI: BUFFER_STORE_DWORD [[VREG]], 281; SI: S_ENDPGM 282; EG-NOT: BFE 283define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind { 284 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone 285 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 286 ret void 287} 288 289; FUNC-LABEL: @bfe_i32_constant_fold_test_9 290; SI-NOT: BFE 291; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 292; SI: BUFFER_STORE_DWORD [[VREG]], 293; SI: S_ENDPGM 294; EG-NOT: BFE 295define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind { 296 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone 297 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 298 ret void 299} 300 301; FUNC-LABEL: @bfe_i32_constant_fold_test_10 302; SI-NOT: BFE 303; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 304; SI: BUFFER_STORE_DWORD [[VREG]], 305; SI: S_ENDPGM 306; EG-NOT: BFE 307define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind { 308 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone 309 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 310 ret void 311} 312 313; FUNC-LABEL: @bfe_i32_constant_fold_test_11 314; SI-NOT: BFE 315; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6 316; SI: BUFFER_STORE_DWORD [[VREG]], 317; SI: S_ENDPGM 318; EG-NOT: BFE 319define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind { 320 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone 321 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 322 ret void 323} 324 325; FUNC-LABEL: @bfe_i32_constant_fold_test_12 326; SI-NOT: BFE 327; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 328; SI: BUFFER_STORE_DWORD [[VREG]], 329; SI: S_ENDPGM 330; EG-NOT: BFE 331define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind { 332 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone 333 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 334 ret void 335} 336 337; FUNC-LABEL: @bfe_i32_constant_fold_test_13 338; SI-NOT: BFE 339; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 340; SI: BUFFER_STORE_DWORD [[VREG]], 341; SI: S_ENDPGM 342; EG-NOT: BFE 343define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind { 344 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone 345 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 346 ret void 347} 348 349; FUNC-LABEL: @bfe_i32_constant_fold_test_14 350; SI-NOT: BFE 351; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40 352; SI: BUFFER_STORE_DWORD [[VREG]], 353; SI: S_ENDPGM 354; EG-NOT: BFE 355define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind { 356 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone 357 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 358 ret void 359} 360 361; FUNC-LABEL: @bfe_i32_constant_fold_test_15 362; SI-NOT: BFE 363; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10 364; SI: BUFFER_STORE_DWORD [[VREG]], 365; SI: S_ENDPGM 366; EG-NOT: BFE 367define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind { 368 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone 369 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 370 ret void 371} 372 373; FUNC-LABEL: @bfe_i32_constant_fold_test_16 374; SI-NOT: BFE 375; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 376; SI: BUFFER_STORE_DWORD [[VREG]], 377; SI: S_ENDPGM 378; EG-NOT: BFE 379define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { 380 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone 381 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 382 ret void 383} 384 385; FUNC-LABEL: @bfe_i32_constant_fold_test_17 386; SI-NOT: BFE 387; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f 388; SI: BUFFER_STORE_DWORD [[VREG]], 389; SI: S_ENDPGM 390; EG-NOT: BFE 391define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind { 392 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone 393 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 394 ret void 395} 396 397; FUNC-LABEL: @bfe_i32_constant_fold_test_18 398; SI-NOT: BFE 399; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 400; SI: BUFFER_STORE_DWORD [[VREG]], 401; SI: S_ENDPGM 402; EG-NOT: BFE 403define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { 404 %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone 405 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 406 ret void 407} 408 409; XXX - This should really be a single BFE, but the sext_inreg of the 410; extended type i24 is never custom lowered. 411; FUNC-LABEL: @bfe_sext_in_reg_i24 412; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]], 413; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}} 414; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}} 415; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8 416; XSI-NOT: SHL 417; XSI-NOT: SHR 418; XSI: BUFFER_STORE_DWORD [[BFE]], 419define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 420 %x = load i32 addrspace(1)* %in, align 4 421 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24) 422 %shl = shl i32 %bfe, 8 423 %ashr = ashr i32 %shl, 8 424 store i32 %ashr, i32 addrspace(1)* %out, align 4 425 ret void 426} 427