1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3 4define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 { 5; GFX6-LABEL: v_bfe_i32_arg_arg_arg: 6; GFX6: ; %bb.0: 7; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; GFX6-NEXT: v_bfe_u32 v0, v0, v1, v2 9; GFX6-NEXT: s_setpc_b64 s[30:31] 10 %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2) 11 ret i32 %bfe_i32 12} 13 14define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 15; GFX6-LABEL: s_bfe_i32_arg_arg_arg: 16; GFX6: ; %bb.0: 17; GFX6-NEXT: s_and_b32 s1, s1, 63 18; GFX6-NEXT: s_lshl_b32 s2, s2, 16 19; GFX6-NEXT: s_or_b32 s1, s1, s2 20; GFX6-NEXT: s_bfe_u32 s0, s0, s1 21; GFX6-NEXT: ; return to shader part epilog 22 %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2) 23 ret i32 %bfe_i32 24} 25 26; TODO: Need to expand this. 27; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 { 28; %bfe_i64 = call i32 @llvm.amdgcn.ubfe.i64(i32 %src0, i32 %src1, i32 %src2) 29; ret i64 %bfe_i64 30; } 31 32define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 33; GFX6-LABEL: s_bfe_i64_arg_arg_arg: 34; GFX6: ; %bb.0: 35; GFX6-NEXT: s_and_b32 s2, s2, 63 36; GFX6-NEXT: s_lshl_b32 s3, s3, 16 37; GFX6-NEXT: s_or_b32 s2, s2, s3 38; GFX6-NEXT: s_bfe_u64 s[0:1], s[0:1], s2 39; GFX6-NEXT: ; return to shader part epilog 40 %bfe_i32 = call i64 @llvm.amdgcn.ubfe.i64(i64 %src0, i32 %src1, i32 %src2) 41 ret i64 %bfe_i32 42} 43 44define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { 45; GFX6-LABEL: bfe_u32_arg_arg_arg: 46; GFX6: ; %bb.0: 47; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 48; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 49; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 50; GFX6-NEXT: s_mov_b32 s6, -1 51; GFX6-NEXT: s_mov_b32 s7, 0xf000 52; GFX6-NEXT: s_waitcnt lgkmcnt(0) 53; GFX6-NEXT: s_and_b32 s1, s0, 63 54; GFX6-NEXT: s_lshl_b32 s0, s0, 16 55; GFX6-NEXT: s_or_b32 s0, s1, s0 56; GFX6-NEXT: s_bfe_u32 s0, s2, s0 57; GFX6-NEXT: v_mov_b32_e32 v0, s0 58; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 59; GFX6-NEXT: s_endpgm 60 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1) 61 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 62 ret void 63} 64 65define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 66; GFX6-LABEL: bfe_u32_arg_arg_imm: 67; GFX6: ; %bb.0: 68; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 69; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 70; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 71; GFX6-NEXT: s_mov_b32 s6, -1 72; GFX6-NEXT: s_mov_b32 s7, 0xf000 73; GFX6-NEXT: s_waitcnt lgkmcnt(0) 74; GFX6-NEXT: s_and_b32 s0, s0, 63 75; GFX6-NEXT: s_or_b32 s0, s0, 0x7b0000 76; GFX6-NEXT: s_bfe_u32 s0, s2, s0 77; GFX6-NEXT: v_mov_b32_e32 v0, s0 78; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 79; GFX6-NEXT: s_endpgm 80 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123) 81 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 82 ret void 83} 84 85define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { 86; GFX6-LABEL: bfe_u32_arg_imm_arg: 87; GFX6: ; %bb.0: 88; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 89; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 90; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 91; GFX6-NEXT: s_mov_b32 s6, -1 92; GFX6-NEXT: s_mov_b32 s7, 0xf000 93; GFX6-NEXT: s_waitcnt lgkmcnt(0) 94; GFX6-NEXT: s_lshl_b32 s0, s0, 16 95; GFX6-NEXT: s_or_b32 s0, 59, s0 96; GFX6-NEXT: s_bfe_u32 s0, s2, s0 97; GFX6-NEXT: v_mov_b32_e32 v0, s0 98; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 99; GFX6-NEXT: s_endpgm 100 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2) 101 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 102 ret void 103} 104 105define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { 106; GFX6-LABEL: bfe_u32_imm_arg_arg: 107; GFX6: ; %bb.0: 108; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 109; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 110; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 111; GFX6-NEXT: s_mov_b32 s6, -1 112; GFX6-NEXT: s_mov_b32 s7, 0xf000 113; GFX6-NEXT: s_waitcnt lgkmcnt(0) 114; GFX6-NEXT: s_and_b32 s1, s2, 63 115; GFX6-NEXT: s_lshl_b32 s0, s0, 16 116; GFX6-NEXT: s_or_b32 s0, s1, s0 117; GFX6-NEXT: s_bfe_u32 s0, 0x7b, s0 118; GFX6-NEXT: v_mov_b32_e32 v0, s0 119; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 120; GFX6-NEXT: s_endpgm 121 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2) 122 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 123 ret void 124} 125 126define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 127; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset: 128; GFX6: ; %bb.0: 129; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 130; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 131; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 132; GFX6-NEXT: s_mov_b32 s6, -1 133; GFX6-NEXT: s_mov_b32 s7, 0xf000 134; GFX6-NEXT: s_waitcnt lgkmcnt(0) 135; GFX6-NEXT: s_and_b32 s0, s0, 63 136; GFX6-NEXT: s_bfe_u32 s0, s2, s0 137; GFX6-NEXT: v_mov_b32_e32 v0, s0 138; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 139; GFX6-NEXT: s_endpgm 140 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0) 141 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 142 ret void 143} 144 145define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 146; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset: 147; GFX6: ; %bb.0: 148; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 149; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 150; GFX6-NEXT: s_mov_b32 s6, -1 151; GFX6-NEXT: s_mov_b32 s7, 0xf000 152; GFX6-NEXT: s_waitcnt lgkmcnt(0) 153; GFX6-NEXT: s_bfe_u32 s0, s0, 8 154; GFX6-NEXT: v_mov_b32_e32 v0, s0 155; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 156; GFX6-NEXT: s_endpgm 157 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0) 158 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 159 ret void 160} 161 162define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 163; GFX6-LABEL: bfe_u32_zextload_i8: 164; GFX6: ; %bb.0: 165; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 166; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 167; GFX6-NEXT: s_mov_b32 s2, -1 168; GFX6-NEXT: s_mov_b32 s3, 0xf000 169; GFX6-NEXT: s_mov_b64 s[6:7], s[2:3] 170; GFX6-NEXT: s_waitcnt lgkmcnt(0) 171; GFX6-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 172; GFX6-NEXT: s_waitcnt vmcnt(0) 173; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 8 174; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 175; GFX6-NEXT: s_endpgm 176 %load = load i8, i8 addrspace(1)* %in 177 %ext = zext i8 %load to i32 178 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 179 store i32 %bfe, i32 addrspace(1)* %out, align 4 180 ret void 181} 182 183; FIXME: Should be using s_add_i32 184define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 185; GFX6-LABEL: bfe_u32_zext_in_reg_i8: 186; GFX6: ; %bb.0: 187; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 188; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 189; GFX6-NEXT: s_mov_b32 s6, -1 190; GFX6-NEXT: s_mov_b32 s7, 0xf000 191; GFX6-NEXT: s_waitcnt lgkmcnt(0) 192; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 193; GFX6-NEXT: s_waitcnt lgkmcnt(0) 194; GFX6-NEXT: s_add_i32 s0, s0, 1 195; GFX6-NEXT: s_and_b32 s0, s0, 0xff 196; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80000 197; GFX6-NEXT: v_mov_b32_e32 v0, s0 198; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 199; GFX6-NEXT: s_endpgm 200 %load = load i32, i32 addrspace(1)* %in, align 4 201 %add = add i32 %load, 1 202 %ext = and i32 %add, 255 203 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) 204 store i32 %bfe, i32 addrspace(1)* %out, align 4 205 ret void 206} 207 208define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 209; GFX6-LABEL: bfe_u32_zext_in_reg_i16: 210; GFX6: ; %bb.0: 211; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 212; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 213; GFX6-NEXT: s_mov_b32 s6, -1 214; GFX6-NEXT: s_mov_b32 s7, 0xf000 215; GFX6-NEXT: s_waitcnt lgkmcnt(0) 216; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 217; GFX6-NEXT: s_waitcnt lgkmcnt(0) 218; GFX6-NEXT: s_add_i32 s0, s0, 1 219; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 220; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 221; GFX6-NEXT: v_mov_b32_e32 v0, s0 222; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 223; GFX6-NEXT: s_endpgm 224 %load = load i32, i32 addrspace(1)* %in, align 4 225 %add = add i32 %load, 1 226 %ext = and i32 %add, 65535 227 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16) 228 store i32 %bfe, i32 addrspace(1)* %out, align 4 229 ret void 230} 231 232define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 233; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1: 234; GFX6: ; %bb.0: 235; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 236; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 237; GFX6-NEXT: s_mov_b32 s6, -1 238; GFX6-NEXT: s_mov_b32 s7, 0xf000 239; GFX6-NEXT: s_waitcnt lgkmcnt(0) 240; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 241; GFX6-NEXT: s_waitcnt lgkmcnt(0) 242; GFX6-NEXT: s_add_i32 s0, s0, 1 243; GFX6-NEXT: s_and_b32 s0, s0, 0xff 244; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80001 245; GFX6-NEXT: v_mov_b32_e32 v0, s0 246; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 247; GFX6-NEXT: s_endpgm 248 %load = load i32, i32 addrspace(1)* %in, align 4 249 %add = add i32 %load, 1 250 %ext = and i32 %add, 255 251 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8) 252 store i32 %bfe, i32 addrspace(1)* %out, align 4 253 ret void 254} 255 256define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 257; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3: 258; GFX6: ; %bb.0: 259; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 260; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 261; GFX6-NEXT: s_mov_b32 s6, -1 262; GFX6-NEXT: s_mov_b32 s7, 0xf000 263; GFX6-NEXT: s_waitcnt lgkmcnt(0) 264; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 265; GFX6-NEXT: s_waitcnt lgkmcnt(0) 266; GFX6-NEXT: s_add_i32 s0, s0, 1 267; GFX6-NEXT: s_and_b32 s0, s0, 0xff 268; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80003 269; GFX6-NEXT: v_mov_b32_e32 v0, s0 270; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 271; GFX6-NEXT: s_endpgm 272 %load = load i32, i32 addrspace(1)* %in, align 4 273 %add = add i32 %load, 1 274 %ext = and i32 %add, 255 275 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8) 276 store i32 %bfe, i32 addrspace(1)* %out, align 4 277 ret void 278} 279 280define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 281; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7: 282; GFX6: ; %bb.0: 283; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 284; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 285; GFX6-NEXT: s_mov_b32 s6, -1 286; GFX6-NEXT: s_mov_b32 s7, 0xf000 287; GFX6-NEXT: s_waitcnt lgkmcnt(0) 288; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 289; GFX6-NEXT: s_waitcnt lgkmcnt(0) 290; GFX6-NEXT: s_add_i32 s0, s0, 1 291; GFX6-NEXT: s_and_b32 s0, s0, 0xff 292; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80007 293; GFX6-NEXT: v_mov_b32_e32 v0, s0 294; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 295; GFX6-NEXT: s_endpgm 296 %load = load i32, i32 addrspace(1)* %in, align 4 297 %add = add i32 %load, 1 298 %ext = and i32 %add, 255 299 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8) 300 store i32 %bfe, i32 addrspace(1)* %out, align 4 301 ret void 302} 303 304define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 305; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8: 306; GFX6: ; %bb.0: 307; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 308; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 309; GFX6-NEXT: s_mov_b32 s6, -1 310; GFX6-NEXT: s_mov_b32 s7, 0xf000 311; GFX6-NEXT: s_waitcnt lgkmcnt(0) 312; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 313; GFX6-NEXT: s_waitcnt lgkmcnt(0) 314; GFX6-NEXT: s_add_i32 s0, s0, 1 315; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 316; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 317; GFX6-NEXT: v_mov_b32_e32 v0, s0 318; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 319; GFX6-NEXT: s_endpgm 320 %load = load i32, i32 addrspace(1)* %in, align 4 321 %add = add i32 %load, 1 322 %ext = and i32 %add, 65535 323 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8) 324 store i32 %bfe, i32 addrspace(1)* %out, align 4 325 ret void 326} 327 328define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 329; GFX6-LABEL: bfe_u32_test_1: 330; GFX6: ; %bb.0: 331; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 332; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 333; GFX6-NEXT: s_mov_b32 s6, -1 334; GFX6-NEXT: s_mov_b32 s7, 0xf000 335; GFX6-NEXT: s_waitcnt lgkmcnt(0) 336; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 337; GFX6-NEXT: s_waitcnt lgkmcnt(0) 338; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 339; GFX6-NEXT: v_mov_b32_e32 v0, s0 340; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 341; GFX6-NEXT: s_endpgm 342 %x = load i32, i32 addrspace(1)* %in, align 4 343 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1) 344 store i32 %bfe, i32 addrspace(1)* %out, align 4 345 ret void 346} 347 348define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 349; GFX6-LABEL: bfe_u32_test_2: 350; GFX6: ; %bb.0: 351; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 352; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 353; GFX6-NEXT: s_mov_b32 s6, -1 354; GFX6-NEXT: s_mov_b32 s7, 0xf000 355; GFX6-NEXT: s_waitcnt lgkmcnt(0) 356; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 357; GFX6-NEXT: s_waitcnt lgkmcnt(0) 358; GFX6-NEXT: s_lshl_b32 s0, s0, 31 359; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80000 360; GFX6-NEXT: v_mov_b32_e32 v0, s0 361; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 362; GFX6-NEXT: s_endpgm 363 %x = load i32, i32 addrspace(1)* %in, align 4 364 %shl = shl i32 %x, 31 365 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8) 366 store i32 %bfe, i32 addrspace(1)* %out, align 4 367 ret void 368} 369 370define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 371; GFX6-LABEL: bfe_u32_test_3: 372; GFX6: ; %bb.0: 373; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 374; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 375; GFX6-NEXT: s_mov_b32 s6, -1 376; GFX6-NEXT: s_mov_b32 s7, 0xf000 377; GFX6-NEXT: s_waitcnt lgkmcnt(0) 378; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 379; GFX6-NEXT: s_waitcnt lgkmcnt(0) 380; GFX6-NEXT: s_lshl_b32 s0, s0, 31 381; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 382; GFX6-NEXT: v_mov_b32_e32 v0, s0 383; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 384; GFX6-NEXT: s_endpgm 385 %x = load i32, i32 addrspace(1)* %in, align 4 386 %shl = shl i32 %x, 31 387 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1) 388 store i32 %bfe, i32 addrspace(1)* %out, align 4 389 ret void 390} 391 392define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 393; GFX6-LABEL: bfe_u32_test_4: 394; GFX6: ; %bb.0: 395; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 396; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 397; GFX6-NEXT: s_mov_b32 s6, -1 398; GFX6-NEXT: s_mov_b32 s7, 0xf000 399; GFX6-NEXT: s_waitcnt lgkmcnt(0) 400; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 401; GFX6-NEXT: s_waitcnt lgkmcnt(0) 402; GFX6-NEXT: s_lshl_b32 s0, s0, 31 403; GFX6-NEXT: s_lshr_b32 s0, s0, 31 404; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 405; GFX6-NEXT: v_mov_b32_e32 v0, s0 406; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 407; GFX6-NEXT: s_endpgm 408 %x = load i32, i32 addrspace(1)* %in, align 4 409 %shl = shl i32 %x, 31 410 %shr = lshr i32 %shl, 31 411 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1) 412 store i32 %bfe, i32 addrspace(1)* %out, align 4 413 ret void 414} 415 416define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 417; GFX6-LABEL: bfe_u32_test_5: 418; GFX6: ; %bb.0: 419; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 420; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 421; GFX6-NEXT: s_mov_b32 s6, -1 422; GFX6-NEXT: s_mov_b32 s7, 0xf000 423; GFX6-NEXT: s_waitcnt lgkmcnt(0) 424; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 425; GFX6-NEXT: s_waitcnt lgkmcnt(0) 426; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 427; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 428; GFX6-NEXT: v_mov_b32_e32 v0, s0 429; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 430; GFX6-NEXT: s_endpgm 431 %x = load i32, i32 addrspace(1)* %in, align 4 432 %shl = shl i32 %x, 31 433 %shr = ashr i32 %shl, 31 434 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1) 435 store i32 %bfe, i32 addrspace(1)* %out, align 4 436 ret void 437} 438 439define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 440; GFX6-LABEL: bfe_u32_test_6: 441; GFX6: ; %bb.0: 442; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 443; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 444; GFX6-NEXT: s_mov_b32 s6, -1 445; GFX6-NEXT: s_mov_b32 s7, 0xf000 446; GFX6-NEXT: s_waitcnt lgkmcnt(0) 447; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 448; GFX6-NEXT: s_waitcnt lgkmcnt(0) 449; GFX6-NEXT: s_lshl_b32 s0, s0, 31 450; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1f0001 451; GFX6-NEXT: v_mov_b32_e32 v0, s0 452; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 453; GFX6-NEXT: s_endpgm 454 %x = load i32, i32 addrspace(1)* %in, align 4 455 %shl = shl i32 %x, 31 456 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31) 457 store i32 %bfe, i32 addrspace(1)* %out, align 4 458 ret void 459} 460 461define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 462; GFX6-LABEL: bfe_u32_test_7: 463; GFX6: ; %bb.0: 464; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 465; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 466; GFX6-NEXT: s_mov_b32 s6, -1 467; GFX6-NEXT: s_mov_b32 s7, 0xf000 468; GFX6-NEXT: s_waitcnt lgkmcnt(0) 469; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 470; GFX6-NEXT: s_waitcnt lgkmcnt(0) 471; GFX6-NEXT: s_lshl_b32 s0, s0, 31 472; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1f0000 473; GFX6-NEXT: v_mov_b32_e32 v0, s0 474; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 475; GFX6-NEXT: s_endpgm 476 %x = load i32, i32 addrspace(1)* %in, align 4 477 %shl = shl i32 %x, 31 478 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31) 479 store i32 %bfe, i32 addrspace(1)* %out, align 4 480 ret void 481} 482 483define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 484; GFX6-LABEL: bfe_u32_test_8: 485; GFX6: ; %bb.0: 486; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 487; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 488; GFX6-NEXT: s_mov_b32 s6, -1 489; GFX6-NEXT: s_mov_b32 s7, 0xf000 490; GFX6-NEXT: s_waitcnt lgkmcnt(0) 491; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 492; GFX6-NEXT: s_waitcnt lgkmcnt(0) 493; GFX6-NEXT: s_lshl_b32 s0, s0, 31 494; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 495; GFX6-NEXT: v_mov_b32_e32 v0, s0 496; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 497; GFX6-NEXT: s_endpgm 498 %x = load i32, i32 addrspace(1)* %in, align 4 499 %shl = shl i32 %x, 31 500 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 501 store i32 %bfe, i32 addrspace(1)* %out, align 4 502 ret void 503} 504 505define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 506; GFX6-LABEL: bfe_u32_test_9: 507; GFX6: ; %bb.0: 508; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 509; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 510; GFX6-NEXT: s_mov_b32 s6, -1 511; GFX6-NEXT: s_mov_b32 s7, 0xf000 512; GFX6-NEXT: s_waitcnt lgkmcnt(0) 513; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 514; GFX6-NEXT: s_waitcnt lgkmcnt(0) 515; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 516; GFX6-NEXT: v_mov_b32_e32 v0, s0 517; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 518; GFX6-NEXT: s_endpgm 519 %x = load i32, i32 addrspace(1)* %in, align 4 520 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1) 521 store i32 %bfe, i32 addrspace(1)* %out, align 4 522 ret void 523} 524 525define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 526; GFX6-LABEL: bfe_u32_test_10: 527; GFX6: ; %bb.0: 528; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 529; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 530; GFX6-NEXT: s_mov_b32 s6, -1 531; GFX6-NEXT: s_mov_b32 s7, 0xf000 532; GFX6-NEXT: s_waitcnt lgkmcnt(0) 533; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 534; GFX6-NEXT: s_waitcnt lgkmcnt(0) 535; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1f0001 536; GFX6-NEXT: v_mov_b32_e32 v0, s0 537; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 538; GFX6-NEXT: s_endpgm 539 %x = load i32, i32 addrspace(1)* %in, align 4 540 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31) 541 store i32 %bfe, i32 addrspace(1)* %out, align 4 542 ret void 543} 544 545define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 546; GFX6-LABEL: bfe_u32_test_11: 547; GFX6: ; %bb.0: 548; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 549; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 550; GFX6-NEXT: s_mov_b32 s6, -1 551; GFX6-NEXT: s_mov_b32 s7, 0xf000 552; GFX6-NEXT: s_waitcnt lgkmcnt(0) 553; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 554; GFX6-NEXT: s_waitcnt lgkmcnt(0) 555; GFX6-NEXT: s_bfe_u32 s0, s0, 0x180008 556; GFX6-NEXT: v_mov_b32_e32 v0, s0 557; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 558; GFX6-NEXT: s_endpgm 559 %x = load i32, i32 addrspace(1)* %in, align 4 560 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24) 561 store i32 %bfe, i32 addrspace(1)* %out, align 4 562 ret void 563} 564 565define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 566; GFX6-LABEL: bfe_u32_test_12: 567; GFX6: ; %bb.0: 568; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 569; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 570; GFX6-NEXT: s_mov_b32 s6, -1 571; GFX6-NEXT: s_mov_b32 s7, 0xf000 572; GFX6-NEXT: s_waitcnt lgkmcnt(0) 573; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 574; GFX6-NEXT: s_waitcnt lgkmcnt(0) 575; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80018 576; GFX6-NEXT: v_mov_b32_e32 v0, s0 577; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 578; GFX6-NEXT: s_endpgm 579 %x = load i32, i32 addrspace(1)* %in, align 4 580 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8) 581 store i32 %bfe, i32 addrspace(1)* %out, align 4 582 ret void 583} 584 585; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} 586define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 587; GFX6-LABEL: bfe_u32_test_13: 588; GFX6: ; %bb.0: 589; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 590; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 591; GFX6-NEXT: s_mov_b32 s6, -1 592; GFX6-NEXT: s_mov_b32 s7, 0xf000 593; GFX6-NEXT: s_waitcnt lgkmcnt(0) 594; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 595; GFX6-NEXT: s_waitcnt lgkmcnt(0) 596; GFX6-NEXT: s_ashr_i32 s0, s0, 31 597; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 598; GFX6-NEXT: v_mov_b32_e32 v0, s0 599; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 600; GFX6-NEXT: s_endpgm 601 %x = load i32, i32 addrspace(1)* %in, align 4 602 %shl = ashr i32 %x, 31 603 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 604 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 605} 606 607define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 608; GFX6-LABEL: bfe_u32_test_14: 609; GFX6: ; %bb.0: 610; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 611; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 612; GFX6-NEXT: s_mov_b32 s6, -1 613; GFX6-NEXT: s_mov_b32 s7, 0xf000 614; GFX6-NEXT: s_waitcnt lgkmcnt(0) 615; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 616; GFX6-NEXT: s_waitcnt lgkmcnt(0) 617; GFX6-NEXT: s_lshr_b32 s0, s0, 31 618; GFX6-NEXT: s_bfe_u32 s0, s0, 0x1001f 619; GFX6-NEXT: v_mov_b32_e32 v0, s0 620; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 621; GFX6-NEXT: s_endpgm 622 %x = load i32, i32 addrspace(1)* %in, align 4 623 %shl = lshr i32 %x, 31 624 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) 625 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 626} 627 628define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { 629; GFX6-LABEL: bfe_u32_constant_fold_test_0: 630; GFX6: ; %bb.0: 631; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 632; GFX6-NEXT: s_bfe_u32 s2, 0, 0 633; GFX6-NEXT: v_mov_b32_e32 v0, s2 634; GFX6-NEXT: s_mov_b32 s2, -1 635; GFX6-NEXT: s_mov_b32 s3, 0xf000 636; GFX6-NEXT: s_waitcnt lgkmcnt(0) 637; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 638; GFX6-NEXT: s_endpgm 639 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) 640 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 641 ret void 642} 643 644define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { 645; GFX6-LABEL: bfe_u32_constant_fold_test_1: 646; GFX6: ; %bb.0: 647; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 648; GFX6-NEXT: s_bfe_u32 s2, 0x302e, 0 649; GFX6-NEXT: v_mov_b32_e32 v0, s2 650; GFX6-NEXT: s_mov_b32 s2, -1 651; GFX6-NEXT: s_mov_b32 s3, 0xf000 652; GFX6-NEXT: s_waitcnt lgkmcnt(0) 653; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 654; GFX6-NEXT: s_endpgm 655 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0) 656 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 657 ret void 658} 659 660define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { 661; GFX6-LABEL: bfe_u32_constant_fold_test_2: 662; GFX6: ; %bb.0: 663; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 664; GFX6-NEXT: s_bfe_u32 s2, 0, 0x10000 665; GFX6-NEXT: v_mov_b32_e32 v0, s2 666; GFX6-NEXT: s_mov_b32 s2, -1 667; GFX6-NEXT: s_mov_b32 s3, 0xf000 668; GFX6-NEXT: s_waitcnt lgkmcnt(0) 669; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 670; GFX6-NEXT: s_endpgm 671 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1) 672 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 673 ret void 674} 675 676define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { 677; GFX6-LABEL: bfe_u32_constant_fold_test_3: 678; GFX6: ; %bb.0: 679; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 680; GFX6-NEXT: s_bfe_u32 s2, 1, 0x10000 681; GFX6-NEXT: v_mov_b32_e32 v0, s2 682; GFX6-NEXT: s_mov_b32 s2, -1 683; GFX6-NEXT: s_mov_b32 s3, 0xf000 684; GFX6-NEXT: s_waitcnt lgkmcnt(0) 685; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 686; GFX6-NEXT: s_endpgm 687 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1) 688 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 689 ret void 690} 691 692define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { 693; GFX6-LABEL: bfe_u32_constant_fold_test_4: 694; GFX6: ; %bb.0: 695; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 696; GFX6-NEXT: s_bfe_u32 s2, -1, 0x10000 697; GFX6-NEXT: v_mov_b32_e32 v0, s2 698; GFX6-NEXT: s_mov_b32 s2, -1 699; GFX6-NEXT: s_mov_b32 s3, 0xf000 700; GFX6-NEXT: s_waitcnt lgkmcnt(0) 701; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 702; GFX6-NEXT: s_endpgm 703 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1) 704 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 705 ret void 706} 707 708define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { 709; GFX6-LABEL: bfe_u32_constant_fold_test_5: 710; GFX6: ; %bb.0: 711; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 712; GFX6-NEXT: s_mov_b32 s2, 0x10007 713; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 714; GFX6-NEXT: v_mov_b32_e32 v0, s2 715; GFX6-NEXT: s_mov_b32 s2, -1 716; GFX6-NEXT: s_mov_b32 s3, 0xf000 717; GFX6-NEXT: s_waitcnt lgkmcnt(0) 718; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 719; GFX6-NEXT: s_endpgm 720 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1) 721 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 722 ret void 723} 724 725define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { 726; GFX6-LABEL: bfe_u32_constant_fold_test_6: 727; GFX6: ; %bb.0: 728; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 729; GFX6-NEXT: s_mov_b32 s2, 0x80000 730; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 731; GFX6-NEXT: v_mov_b32_e32 v0, s2 732; GFX6-NEXT: s_mov_b32 s2, -1 733; GFX6-NEXT: s_mov_b32 s3, 0xf000 734; GFX6-NEXT: s_waitcnt lgkmcnt(0) 735; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 736; GFX6-NEXT: s_endpgm 737 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8) 738 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 739 ret void 740} 741 742define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { 743; GFX6-LABEL: bfe_u32_constant_fold_test_7: 744; GFX6: ; %bb.0: 745; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 746; GFX6-NEXT: s_mov_b32 s2, 0x80000 747; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 748; GFX6-NEXT: v_mov_b32_e32 v0, s2 749; GFX6-NEXT: s_mov_b32 s2, -1 750; GFX6-NEXT: s_mov_b32 s3, 0xf000 751; GFX6-NEXT: s_waitcnt lgkmcnt(0) 752; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 753; GFX6-NEXT: s_endpgm 754 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8) 755 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 756 ret void 757} 758 759define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { 760; GFX6-LABEL: bfe_u32_constant_fold_test_8: 761; GFX6: ; %bb.0: 762; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 763; GFX6-NEXT: s_mov_b32 s2, 0x80006 764; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 765; GFX6-NEXT: v_mov_b32_e32 v0, s2 766; GFX6-NEXT: s_mov_b32 s2, -1 767; GFX6-NEXT: s_mov_b32 s3, 0xf000 768; GFX6-NEXT: s_waitcnt lgkmcnt(0) 769; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 770; GFX6-NEXT: s_endpgm 771 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8) 772 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 773 ret void 774} 775 776define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { 777; GFX6-LABEL: bfe_u32_constant_fold_test_9: 778; GFX6: ; %bb.0: 779; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 780; GFX6-NEXT: s_mov_b32 s2, 0x80010 781; GFX6-NEXT: s_bfe_u32 s2, 0x10000, s2 782; GFX6-NEXT: v_mov_b32_e32 v0, s2 783; GFX6-NEXT: s_mov_b32 s2, -1 784; GFX6-NEXT: s_mov_b32 s3, 0xf000 785; GFX6-NEXT: s_waitcnt lgkmcnt(0) 786; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 787; GFX6-NEXT: s_endpgm 788 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8) 789 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 790 ret void 791} 792 793define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { 794; GFX6-LABEL: bfe_u32_constant_fold_test_10: 795; GFX6: ; %bb.0: 796; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 797; GFX6-NEXT: s_mov_b32 s2, 0x100010 798; GFX6-NEXT: s_bfe_u32 s2, 0xffff, s2 799; GFX6-NEXT: v_mov_b32_e32 v0, s2 800; GFX6-NEXT: s_mov_b32 s2, -1 801; GFX6-NEXT: s_mov_b32 s3, 0xf000 802; GFX6-NEXT: s_waitcnt lgkmcnt(0) 803; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 804; GFX6-NEXT: s_endpgm 805 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16) 806 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 807 ret void 808} 809 810define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { 811; GFX6-LABEL: bfe_u32_constant_fold_test_11: 812; GFX6: ; %bb.0: 813; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 814; GFX6-NEXT: s_mov_b32 s2, 0x40004 815; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 816; GFX6-NEXT: v_mov_b32_e32 v0, s2 817; GFX6-NEXT: s_mov_b32 s2, -1 818; GFX6-NEXT: s_mov_b32 s3, 0xf000 819; GFX6-NEXT: s_waitcnt lgkmcnt(0) 820; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 821; GFX6-NEXT: s_endpgm 822 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4) 823 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 824 ret void 825} 826 827define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { 828; GFX6-LABEL: bfe_u32_constant_fold_test_12: 829; GFX6: ; %bb.0: 830; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 831; GFX6-NEXT: s_mov_b32 s2, 0x1001f 832; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 833; GFX6-NEXT: v_mov_b32_e32 v0, s2 834; GFX6-NEXT: s_mov_b32 s2, -1 835; GFX6-NEXT: s_mov_b32 s3, 0xf000 836; GFX6-NEXT: s_waitcnt lgkmcnt(0) 837; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 838; GFX6-NEXT: s_endpgm 839 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1) 840 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 841 ret void 842} 843 844define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { 845; GFX6-LABEL: bfe_u32_constant_fold_test_13: 846; GFX6: ; %bb.0: 847; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 848; GFX6-NEXT: s_mov_b32 s2, 0x100010 849; GFX6-NEXT: s_bfe_u32 s2, 0x1fffe, s2 850; GFX6-NEXT: v_mov_b32_e32 v0, s2 851; GFX6-NEXT: s_mov_b32 s2, -1 852; GFX6-NEXT: s_mov_b32 s3, 0xf000 853; GFX6-NEXT: s_waitcnt lgkmcnt(0) 854; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 855; GFX6-NEXT: s_endpgm 856 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16) 857 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 858 ret void 859} 860 861define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { 862; GFX6-LABEL: bfe_u32_constant_fold_test_14: 863; GFX6: ; %bb.0: 864; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 865; GFX6-NEXT: s_mov_b32 s2, 0x1e0002 866; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 867; GFX6-NEXT: v_mov_b32_e32 v0, s2 868; GFX6-NEXT: s_mov_b32 s2, -1 869; GFX6-NEXT: s_mov_b32 s3, 0xf000 870; GFX6-NEXT: s_waitcnt lgkmcnt(0) 871; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 872; GFX6-NEXT: s_endpgm 873 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30) 874 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 875 ret void 876} 877 878define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { 879; GFX6-LABEL: bfe_u32_constant_fold_test_15: 880; GFX6: ; %bb.0: 881; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 882; GFX6-NEXT: s_mov_b32 s2, 0x1c0004 883; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 884; GFX6-NEXT: v_mov_b32_e32 v0, s2 885; GFX6-NEXT: s_mov_b32 s2, -1 886; GFX6-NEXT: s_mov_b32 s3, 0xf000 887; GFX6-NEXT: s_waitcnt lgkmcnt(0) 888; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 889; GFX6-NEXT: s_endpgm 890 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28) 891 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 892 ret void 893} 894 895define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { 896; GFX6-LABEL: bfe_u32_constant_fold_test_16: 897; GFX6: ; %bb.0: 898; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 899; GFX6-NEXT: s_bfe_u32 s2, -1, 0x70001 900; GFX6-NEXT: v_mov_b32_e32 v0, s2 901; GFX6-NEXT: s_mov_b32 s2, -1 902; GFX6-NEXT: s_mov_b32 s3, 0xf000 903; GFX6-NEXT: s_waitcnt lgkmcnt(0) 904; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 905; GFX6-NEXT: s_endpgm 906 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7) 907 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 908 ret void 909} 910 911define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { 912; GFX6-LABEL: bfe_u32_constant_fold_test_17: 913; GFX6: ; %bb.0: 914; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 915; GFX6-NEXT: s_mov_b32 s2, 0x1f0001 916; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 917; GFX6-NEXT: v_mov_b32_e32 v0, s2 918; GFX6-NEXT: s_mov_b32 s2, -1 919; GFX6-NEXT: s_mov_b32 s3, 0xf000 920; GFX6-NEXT: s_waitcnt lgkmcnt(0) 921; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 922; GFX6-NEXT: s_endpgm 923 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31) 924 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 925 ret void 926} 927 928define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { 929; GFX6-LABEL: bfe_u32_constant_fold_test_18: 930; GFX6: ; %bb.0: 931; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 932; GFX6-NEXT: s_mov_b32 s2, 0x1001f 933; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 934; GFX6-NEXT: v_mov_b32_e32 v0, s2 935; GFX6-NEXT: s_mov_b32 s2, -1 936; GFX6-NEXT: s_mov_b32 s3, 0xf000 937; GFX6-NEXT: s_waitcnt lgkmcnt(0) 938; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 939; GFX6-NEXT: s_endpgm 940 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1) 941 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 942 ret void 943} 944 945; Make sure that SimplifyDemandedBits doesn't cause the and to be 946; reduced to the bits demanded by the bfe. 947 948; XXX: The operand to v_bfe_u32 could also just directly be the load register. 949define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, 950; GFX6-LABEL: simplify_bfe_u32_multi_use_arg: 951; GFX6: ; %bb.0: 952; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 953; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb 954; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 955; GFX6-NEXT: s_mov_b32 s6, -1 956; GFX6-NEXT: s_mov_b32 s7, 0xf000 957; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7] 958; GFX6-NEXT: s_waitcnt lgkmcnt(0) 959; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 960; GFX6-NEXT: s_waitcnt lgkmcnt(0) 961; GFX6-NEXT: s_and_b32 s0, s0, 63 962; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002 963; GFX6-NEXT: v_mov_b32_e32 v1, s1 964; GFX6-NEXT: v_mov_b32_e32 v0, s0 965; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0 966; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 967; GFX6-NEXT: s_endpgm 968 i32 addrspace(1)* %out1, 969 i32 addrspace(1)* %in) #0 { 970 %src = load i32, i32 addrspace(1)* %in, align 4 971 %and = and i32 %src, 63 972 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2) 973 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4 974 store i32 %and, i32 addrspace(1)* %out1, align 4 975 ret void 976} 977 978define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { 979; GFX6-LABEL: lshr_and: 980; GFX6: ; %bb.0: 981; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 982; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 983; GFX6-NEXT: s_mov_b32 s6, -1 984; GFX6-NEXT: s_mov_b32 s7, 0xf000 985; GFX6-NEXT: s_waitcnt lgkmcnt(0) 986; GFX6-NEXT: s_lshr_b32 s0, s0, 6 987; GFX6-NEXT: s_and_b32 s0, s0, 7 988; GFX6-NEXT: v_mov_b32_e32 v0, s0 989; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 990; GFX6-NEXT: s_endpgm 991 %b = lshr i32 %a, 6 992 %c = and i32 %b, 7 993 store i32 %c, i32 addrspace(1)* %out, align 8 994 ret void 995} 996 997define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 998; GFX6-LABEL: v_lshr_and: 999; GFX6: ; %bb.0: 1000; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1001; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 1002; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 1003; GFX6-NEXT: s_mov_b32 s6, -1 1004; GFX6-NEXT: s_mov_b32 s7, 0xf000 1005; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1006; GFX6-NEXT: s_lshr_b32 s0, s2, s0 1007; GFX6-NEXT: s_and_b32 s0, s0, 7 1008; GFX6-NEXT: v_mov_b32_e32 v0, s0 1009; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1010; GFX6-NEXT: s_endpgm 1011 %c = lshr i32 %a, %b 1012 %d = and i32 %c, 7 1013 store i32 %d, i32 addrspace(1)* %out, align 8 1014 ret void 1015} 1016 1017define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { 1018; GFX6-LABEL: and_lshr: 1019; GFX6: ; %bb.0: 1020; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1021; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 1022; GFX6-NEXT: s_mov_b32 s6, -1 1023; GFX6-NEXT: s_mov_b32 s7, 0xf000 1024; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1025; GFX6-NEXT: s_and_b32 s0, s0, 0x1c0 1026; GFX6-NEXT: s_lshr_b32 s0, s0, 6 1027; GFX6-NEXT: v_mov_b32_e32 v0, s0 1028; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1029; GFX6-NEXT: s_endpgm 1030 %b = and i32 %a, 448 1031 %c = lshr i32 %b, 6 1032 store i32 %c, i32 addrspace(1)* %out, align 8 1033 ret void 1034} 1035 1036define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { 1037; GFX6-LABEL: and_lshr2: 1038; GFX6: ; %bb.0: 1039; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1040; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 1041; GFX6-NEXT: s_mov_b32 s6, -1 1042; GFX6-NEXT: s_mov_b32 s7, 0xf000 1043; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1044; GFX6-NEXT: s_and_b32 s0, s0, 0x1ff 1045; GFX6-NEXT: s_lshr_b32 s0, s0, 6 1046; GFX6-NEXT: v_mov_b32_e32 v0, s0 1047; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1048; GFX6-NEXT: s_endpgm 1049 %b = and i32 %a, 511 1050 %c = lshr i32 %b, 6 1051 store i32 %c, i32 addrspace(1)* %out, align 8 1052 ret void 1053} 1054 1055define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 { 1056; GFX6-LABEL: shl_lshr: 1057; GFX6: ; %bb.0: 1058; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1059; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 1060; GFX6-NEXT: s_mov_b32 s6, -1 1061; GFX6-NEXT: s_mov_b32 s7, 0xf000 1062; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1063; GFX6-NEXT: s_lshl_b32 s0, s0, 9 1064; GFX6-NEXT: s_lshr_b32 s0, s0, 11 1065; GFX6-NEXT: v_mov_b32_e32 v0, s0 1066; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1067; GFX6-NEXT: s_endpgm 1068 %b = shl i32 %a, 9 1069 %c = lshr i32 %b, 11 1070 store i32 %c, i32 addrspace(1)* %out, align 8 1071 ret void 1072} 1073 1074declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1 1075declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) #1 1076 1077attributes #0 = { nounwind } 1078attributes #1 = { nounwind readnone } 1079