1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3 4define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 { 5; GFX6-LABEL: v_bfe_i32_arg_arg_arg: 6; GFX6: ; %bb.0: 7; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; GFX6-NEXT: v_bfe_i32 v0, v0, v1, v2 9; GFX6-NEXT: s_setpc_b64 s[30:31] 10 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2) 11 ret i32 %bfe_i32 12} 13 14define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 15; GFX6-LABEL: s_bfe_i32_arg_arg_arg: 16; GFX6: ; %bb.0: 17; GFX6-NEXT: s_and_b32 s1, s1, 63 18; GFX6-NEXT: s_lshl_b32 s2, s2, 16 19; GFX6-NEXT: s_or_b32 s1, s1, s2 20; GFX6-NEXT: s_bfe_i32 s0, s0, s1 21; GFX6-NEXT: ; return to shader part epilog 22 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2) 23 ret i32 %bfe_i32 24} 25 26; TODO: Need to expand this 27; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 { 28; %bfe_i64 = call i32 @llvm.amdgcn.sbfe.i64(i32 %src0, i32 %src1, i32 %src2) 29; ret i64 %bfe_i64 30; } 31 32define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 { 33; GFX6-LABEL: s_bfe_i64_arg_arg_arg: 34; GFX6: ; %bb.0: 35; GFX6-NEXT: s_and_b32 s2, s2, 63 36; GFX6-NEXT: s_lshl_b32 s3, s3, 16 37; GFX6-NEXT: s_or_b32 s2, s2, s3 38; GFX6-NEXT: s_bfe_i64 s[0:1], s[0:1], s2 39; GFX6-NEXT: ; return to shader part epilog 40 %bfe_i32 = call i64 @llvm.amdgcn.sbfe.i64(i64 %src0, i32 %src1, i32 %src2) 41 ret i64 %bfe_i32 42} 43 44define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 45; GFX6-LABEL: bfe_i32_arg_arg_imm: 46; GFX6: ; %bb.0: 47; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 48; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 49; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 50; GFX6-NEXT: s_mov_b32 s6, -1 51; GFX6-NEXT: s_mov_b32 s7, 0xf000 52; GFX6-NEXT: s_waitcnt lgkmcnt(0) 53; GFX6-NEXT: s_and_b32 s0, s0, 63 54; GFX6-NEXT: s_or_b32 s0, s0, 0x7b0000 55; GFX6-NEXT: s_bfe_i32 s0, s2, s0 56; GFX6-NEXT: v_mov_b32_e32 v0, s0 57; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 58; GFX6-NEXT: s_endpgm 59 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 123) 60 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 61 ret void 62} 63 64define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { 65; GFX6-LABEL: bfe_i32_arg_imm_arg: 66; GFX6: ; %bb.0: 67; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 68; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 69; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 70; GFX6-NEXT: s_mov_b32 s6, -1 71; GFX6-NEXT: s_mov_b32 s7, 0xf000 72; GFX6-NEXT: s_waitcnt lgkmcnt(0) 73; GFX6-NEXT: s_lshl_b32 s0, s0, 16 74; GFX6-NEXT: s_or_b32 s0, 59, s0 75; GFX6-NEXT: s_bfe_i32 s0, s2, s0 76; GFX6-NEXT: v_mov_b32_e32 v0, s0 77; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 78; GFX6-NEXT: s_endpgm 79 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 123, i32 %src2) 80 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 81 ret void 82} 83 84define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { 85; GFX6-LABEL: bfe_i32_imm_arg_arg: 86; GFX6: ; %bb.0: 87; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 88; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 89; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 90; GFX6-NEXT: s_mov_b32 s6, -1 91; GFX6-NEXT: s_mov_b32 s7, 0xf000 92; GFX6-NEXT: s_waitcnt lgkmcnt(0) 93; GFX6-NEXT: s_and_b32 s1, s2, 63 94; GFX6-NEXT: s_lshl_b32 s0, s0, 16 95; GFX6-NEXT: s_or_b32 s0, s1, s0 96; GFX6-NEXT: s_bfe_i32 s0, 0x7b, s0 97; GFX6-NEXT: v_mov_b32_e32 v0, s0 98; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 99; GFX6-NEXT: s_endpgm 100 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 %src1, i32 %src2) 101 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 102 ret void 103} 104 105define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) #0 { 106; GFX6-LABEL: v_bfe_print_arg: 107; GFX6: ; %bb.0: 108; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 109; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 110; GFX6-NEXT: s_mov_b32 s6, -1 111; GFX6-NEXT: s_mov_b32 s7, 0xf000 112; GFX6-NEXT: s_waitcnt lgkmcnt(0) 113; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 114; GFX6-NEXT: s_waitcnt lgkmcnt(0) 115; GFX6-NEXT: s_bfe_i32 s0, s0, 0x80002 116; GFX6-NEXT: v_mov_b32_e32 v0, s0 117; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 118; GFX6-NEXT: s_endpgm 119 %load = load i32, i32 addrspace(1)* %src0, align 4 120 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 2, i32 8) 121 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 122 ret void 123} 124 125define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 126; GFX6-LABEL: bfe_i32_arg_0_width_reg_offset: 127; GFX6: ; %bb.0: 128; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 129; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 130; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 131; GFX6-NEXT: s_mov_b32 s6, -1 132; GFX6-NEXT: s_mov_b32 s7, 0xf000 133; GFX6-NEXT: s_waitcnt lgkmcnt(0) 134; GFX6-NEXT: s_and_b32 s0, s0, 63 135; GFX6-NEXT: s_bfe_i32 s0, s2, s0 136; GFX6-NEXT: v_mov_b32_e32 v0, s0 137; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 138; GFX6-NEXT: s_endpgm 139 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 0) 140 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 141 ret void 142} 143 144define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { 145; GFX6-LABEL: bfe_i32_arg_0_width_imm_offset: 146; GFX6: ; %bb.0: 147; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 148; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 149; GFX6-NEXT: s_mov_b32 s6, -1 150; GFX6-NEXT: s_mov_b32 s7, 0xf000 151; GFX6-NEXT: s_waitcnt lgkmcnt(0) 152; GFX6-NEXT: s_bfe_i32 s0, s0, 8 153; GFX6-NEXT: v_mov_b32_e32 v0, s0 154; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 155; GFX6-NEXT: s_endpgm 156 %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 8, i32 0) 157 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 158 ret void 159} 160 161define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 162; GFX6-LABEL: bfe_i32_test_6: 163; GFX6: ; %bb.0: 164; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 165; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 166; GFX6-NEXT: s_mov_b32 s6, -1 167; GFX6-NEXT: s_mov_b32 s7, 0xf000 168; GFX6-NEXT: s_waitcnt lgkmcnt(0) 169; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 170; GFX6-NEXT: s_waitcnt lgkmcnt(0) 171; GFX6-NEXT: s_lshl_b32 s0, s0, 31 172; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1f0001 173; GFX6-NEXT: v_mov_b32_e32 v0, s0 174; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 175; GFX6-NEXT: s_endpgm 176 %x = load i32, i32 addrspace(1)* %in, align 4 177 %shl = shl i32 %x, 31 178 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 1, i32 31) 179 store i32 %bfe, i32 addrspace(1)* %out, align 4 180 ret void 181} 182 183define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 184; GFX6-LABEL: bfe_i32_test_7: 185; GFX6: ; %bb.0: 186; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 187; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 188; GFX6-NEXT: s_mov_b32 s6, -1 189; GFX6-NEXT: s_mov_b32 s7, 0xf000 190; GFX6-NEXT: s_waitcnt lgkmcnt(0) 191; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 192; GFX6-NEXT: s_waitcnt lgkmcnt(0) 193; GFX6-NEXT: s_lshl_b32 s0, s0, 31 194; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1f0000 195; GFX6-NEXT: v_mov_b32_e32 v0, s0 196; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 197; GFX6-NEXT: s_endpgm 198 %x = load i32, i32 addrspace(1)* %in, align 4 199 %shl = shl i32 %x, 31 200 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 0, i32 31) 201 store i32 %bfe, i32 addrspace(1)* %out, align 4 202 ret void 203} 204 205define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 206; GFX6-LABEL: bfe_i32_test_8: 207; GFX6: ; %bb.0: 208; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 209; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 210; GFX6-NEXT: s_mov_b32 s6, -1 211; GFX6-NEXT: s_mov_b32 s7, 0xf000 212; GFX6-NEXT: s_waitcnt lgkmcnt(0) 213; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 214; GFX6-NEXT: s_waitcnt lgkmcnt(0) 215; GFX6-NEXT: s_lshl_b32 s0, s0, 31 216; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1001f 217; GFX6-NEXT: v_mov_b32_e32 v0, s0 218; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 219; GFX6-NEXT: s_endpgm 220 %x = load i32, i32 addrspace(1)* %in, align 4 221 %shl = shl i32 %x, 31 222 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 223 store i32 %bfe, i32 addrspace(1)* %out, align 4 224 ret void 225} 226 227define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 228; GFX6-LABEL: bfe_i32_test_9: 229; GFX6: ; %bb.0: 230; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 231; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 232; GFX6-NEXT: s_mov_b32 s6, -1 233; GFX6-NEXT: s_mov_b32 s7, 0xf000 234; GFX6-NEXT: s_waitcnt lgkmcnt(0) 235; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 236; GFX6-NEXT: s_waitcnt lgkmcnt(0) 237; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1001f 238; GFX6-NEXT: v_mov_b32_e32 v0, s0 239; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 240; GFX6-NEXT: s_endpgm 241 %x = load i32, i32 addrspace(1)* %in, align 4 242 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 31, i32 1) 243 store i32 %bfe, i32 addrspace(1)* %out, align 4 244 ret void 245} 246 247define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 248; GFX6-LABEL: bfe_i32_test_10: 249; GFX6: ; %bb.0: 250; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 251; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 252; GFX6-NEXT: s_mov_b32 s6, -1 253; GFX6-NEXT: s_mov_b32 s7, 0xf000 254; GFX6-NEXT: s_waitcnt lgkmcnt(0) 255; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 256; GFX6-NEXT: s_waitcnt lgkmcnt(0) 257; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1f0001 258; GFX6-NEXT: v_mov_b32_e32 v0, s0 259; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 260; GFX6-NEXT: s_endpgm 261 %x = load i32, i32 addrspace(1)* %in, align 4 262 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 1, i32 31) 263 store i32 %bfe, i32 addrspace(1)* %out, align 4 264 ret void 265} 266 267define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 268; GFX6-LABEL: bfe_i32_test_11: 269; GFX6: ; %bb.0: 270; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 271; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 272; GFX6-NEXT: s_mov_b32 s6, -1 273; GFX6-NEXT: s_mov_b32 s7, 0xf000 274; GFX6-NEXT: s_waitcnt lgkmcnt(0) 275; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 276; GFX6-NEXT: s_waitcnt lgkmcnt(0) 277; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180008 278; GFX6-NEXT: v_mov_b32_e32 v0, s0 279; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 280; GFX6-NEXT: s_endpgm 281 %x = load i32, i32 addrspace(1)* %in, align 4 282 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 8, i32 24) 283 store i32 %bfe, i32 addrspace(1)* %out, align 4 284 ret void 285} 286 287define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 288; GFX6-LABEL: bfe_i32_test_12: 289; GFX6: ; %bb.0: 290; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 291; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 292; GFX6-NEXT: s_mov_b32 s6, -1 293; GFX6-NEXT: s_mov_b32 s7, 0xf000 294; GFX6-NEXT: s_waitcnt lgkmcnt(0) 295; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 296; GFX6-NEXT: s_waitcnt lgkmcnt(0) 297; GFX6-NEXT: s_bfe_i32 s0, s0, 0x80018 298; GFX6-NEXT: v_mov_b32_e32 v0, s0 299; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 300; GFX6-NEXT: s_endpgm 301 %x = load i32, i32 addrspace(1)* %in, align 4 302 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 24, i32 8) 303 store i32 %bfe, i32 addrspace(1)* %out, align 4 304 ret void 305} 306 307define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 308; GFX6-LABEL: bfe_i32_test_13: 309; GFX6: ; %bb.0: 310; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 311; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 312; GFX6-NEXT: s_mov_b32 s6, -1 313; GFX6-NEXT: s_mov_b32 s7, 0xf000 314; GFX6-NEXT: s_waitcnt lgkmcnt(0) 315; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 316; GFX6-NEXT: s_waitcnt lgkmcnt(0) 317; GFX6-NEXT: s_ashr_i32 s0, s0, 31 318; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1001f 319; GFX6-NEXT: v_mov_b32_e32 v0, s0 320; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 321; GFX6-NEXT: s_endpgm 322 %x = load i32, i32 addrspace(1)* %in, align 4 323 %shl = ashr i32 %x, 31 324 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 325 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 326} 327 328define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 329; GFX6-LABEL: bfe_i32_test_14: 330; GFX6: ; %bb.0: 331; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 332; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 333; GFX6-NEXT: s_mov_b32 s6, -1 334; GFX6-NEXT: s_mov_b32 s7, 0xf000 335; GFX6-NEXT: s_waitcnt lgkmcnt(0) 336; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 337; GFX6-NEXT: s_waitcnt lgkmcnt(0) 338; GFX6-NEXT: s_lshr_b32 s0, s0, 31 339; GFX6-NEXT: s_bfe_i32 s0, s0, 0x1001f 340; GFX6-NEXT: v_mov_b32_e32 v0, s0 341; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 342; GFX6-NEXT: s_endpgm 343 %x = load i32, i32 addrspace(1)* %in, align 4 344 %shl = lshr i32 %x, 31 345 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) 346 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void 347} 348 349define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { 350; GFX6-LABEL: bfe_i32_constant_fold_test_0: 351; GFX6: ; %bb.0: 352; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 353; GFX6-NEXT: s_bfe_i32 s2, 0, 0 354; GFX6-NEXT: v_mov_b32_e32 v0, s2 355; GFX6-NEXT: s_mov_b32 s2, -1 356; GFX6-NEXT: s_mov_b32 s3, 0xf000 357; GFX6-NEXT: s_waitcnt lgkmcnt(0) 358; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 359; GFX6-NEXT: s_endpgm 360 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 0) 361 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 362 ret void 363} 364 365define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { 366; GFX6-LABEL: bfe_i32_constant_fold_test_1: 367; GFX6: ; %bb.0: 368; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 369; GFX6-NEXT: s_bfe_i32 s2, 0x302e, 0 370; GFX6-NEXT: v_mov_b32_e32 v0, s2 371; GFX6-NEXT: s_mov_b32 s2, -1 372; GFX6-NEXT: s_mov_b32 s3, 0xf000 373; GFX6-NEXT: s_waitcnt lgkmcnt(0) 374; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 375; GFX6-NEXT: s_endpgm 376 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 12334, i32 0, i32 0) 377 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 378 ret void 379} 380 381define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { 382; GFX6-LABEL: bfe_i32_constant_fold_test_2: 383; GFX6: ; %bb.0: 384; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 385; GFX6-NEXT: s_bfe_i32 s2, 0, 0x10000 386; GFX6-NEXT: v_mov_b32_e32 v0, s2 387; GFX6-NEXT: s_mov_b32 s2, -1 388; GFX6-NEXT: s_mov_b32 s3, 0xf000 389; GFX6-NEXT: s_waitcnt lgkmcnt(0) 390; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 391; GFX6-NEXT: s_endpgm 392 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 1) 393 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 394 ret void 395} 396 397define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { 398; GFX6-LABEL: bfe_i32_constant_fold_test_3: 399; GFX6: ; %bb.0: 400; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 401; GFX6-NEXT: s_bfe_i32 s2, 1, 0x10000 402; GFX6-NEXT: v_mov_b32_e32 v0, s2 403; GFX6-NEXT: s_mov_b32 s2, -1 404; GFX6-NEXT: s_mov_b32 s3, 0xf000 405; GFX6-NEXT: s_waitcnt lgkmcnt(0) 406; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 407; GFX6-NEXT: s_endpgm 408 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 1, i32 0, i32 1) 409 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 410 ret void 411} 412 413define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { 414; GFX6-LABEL: bfe_i32_constant_fold_test_4: 415; GFX6: ; %bb.0: 416; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 417; GFX6-NEXT: s_bfe_i32 s2, -1, 0x10000 418; GFX6-NEXT: v_mov_b32_e32 v0, s2 419; GFX6-NEXT: s_mov_b32 s2, -1 420; GFX6-NEXT: s_mov_b32 s3, 0xf000 421; GFX6-NEXT: s_waitcnt lgkmcnt(0) 422; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 423; GFX6-NEXT: s_endpgm 424 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 0, i32 1) 425 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 426 ret void 427} 428 429define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { 430; GFX6-LABEL: bfe_i32_constant_fold_test_5: 431; GFX6: ; %bb.0: 432; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 433; GFX6-NEXT: s_mov_b32 s2, 0x10007 434; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2 435; GFX6-NEXT: v_mov_b32_e32 v0, s2 436; GFX6-NEXT: s_mov_b32 s2, -1 437; GFX6-NEXT: s_mov_b32 s3, 0xf000 438; GFX6-NEXT: s_waitcnt lgkmcnt(0) 439; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 440; GFX6-NEXT: s_endpgm 441 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 7, i32 1) 442 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 443 ret void 444} 445 446define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { 447; GFX6-LABEL: bfe_i32_constant_fold_test_6: 448; GFX6: ; %bb.0: 449; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 450; GFX6-NEXT: s_mov_b32 s2, 0x80000 451; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2 452; GFX6-NEXT: v_mov_b32_e32 v0, s2 453; GFX6-NEXT: s_mov_b32 s2, -1 454; GFX6-NEXT: s_mov_b32 s3, 0xf000 455; GFX6-NEXT: s_waitcnt lgkmcnt(0) 456; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 457; GFX6-NEXT: s_endpgm 458 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 0, i32 8) 459 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 460 ret void 461} 462 463define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { 464; GFX6-LABEL: bfe_i32_constant_fold_test_7: 465; GFX6: ; %bb.0: 466; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 467; GFX6-NEXT: s_mov_b32 s2, 0x80000 468; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2 469; GFX6-NEXT: v_mov_b32_e32 v0, s2 470; GFX6-NEXT: s_mov_b32 s2, -1 471; GFX6-NEXT: s_mov_b32 s3, 0xf000 472; GFX6-NEXT: s_waitcnt lgkmcnt(0) 473; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 474; GFX6-NEXT: s_endpgm 475 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 0, i32 8) 476 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 477 ret void 478} 479 480define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { 481; GFX6-LABEL: bfe_i32_constant_fold_test_8: 482; GFX6: ; %bb.0: 483; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 484; GFX6-NEXT: s_mov_b32 s2, 0x80006 485; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2 486; GFX6-NEXT: v_mov_b32_e32 v0, s2 487; GFX6-NEXT: s_mov_b32 s2, -1 488; GFX6-NEXT: s_mov_b32 s3, 0xf000 489; GFX6-NEXT: s_waitcnt lgkmcnt(0) 490; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 491; GFX6-NEXT: s_endpgm 492 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 6, i32 8) 493 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 494 ret void 495} 496 497define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { 498; GFX6-LABEL: bfe_i32_constant_fold_test_9: 499; GFX6: ; %bb.0: 500; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 501; GFX6-NEXT: s_mov_b32 s2, 0x80010 502; GFX6-NEXT: s_bfe_i32 s2, 0x10000, s2 503; GFX6-NEXT: v_mov_b32_e32 v0, s2 504; GFX6-NEXT: s_mov_b32 s2, -1 505; GFX6-NEXT: s_mov_b32 s3, 0xf000 506; GFX6-NEXT: s_waitcnt lgkmcnt(0) 507; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 508; GFX6-NEXT: s_endpgm 509 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65536, i32 16, i32 8) 510 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 511 ret void 512} 513 514define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { 515; GFX6-LABEL: bfe_i32_constant_fold_test_10: 516; GFX6: ; %bb.0: 517; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 518; GFX6-NEXT: s_mov_b32 s2, 0x100010 519; GFX6-NEXT: s_bfe_i32 s2, 0xffff, s2 520; GFX6-NEXT: v_mov_b32_e32 v0, s2 521; GFX6-NEXT: s_mov_b32 s2, -1 522; GFX6-NEXT: s_mov_b32 s3, 0xf000 523; GFX6-NEXT: s_waitcnt lgkmcnt(0) 524; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 525; GFX6-NEXT: s_endpgm 526 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65535, i32 16, i32 16) 527 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 528 ret void 529} 530 531define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { 532; GFX6-LABEL: bfe_i32_constant_fold_test_11: 533; GFX6: ; %bb.0: 534; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 535; GFX6-NEXT: s_mov_b32 s2, 0x40004 536; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 537; GFX6-NEXT: v_mov_b32_e32 v0, s2 538; GFX6-NEXT: s_mov_b32 s2, -1 539; GFX6-NEXT: s_mov_b32 s3, 0xf000 540; GFX6-NEXT: s_waitcnt lgkmcnt(0) 541; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 542; GFX6-NEXT: s_endpgm 543 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 4) 544 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 545 ret void 546} 547 548define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { 549; GFX6-LABEL: bfe_i32_constant_fold_test_12: 550; GFX6: ; %bb.0: 551; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 552; GFX6-NEXT: s_mov_b32 s2, 0x1001f 553; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 554; GFX6-NEXT: v_mov_b32_e32 v0, s2 555; GFX6-NEXT: s_mov_b32 s2, -1 556; GFX6-NEXT: s_mov_b32 s3, 0xf000 557; GFX6-NEXT: s_waitcnt lgkmcnt(0) 558; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 559; GFX6-NEXT: s_endpgm 560 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 31, i32 1) 561 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 562 ret void 563} 564 565define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { 566; GFX6-LABEL: bfe_i32_constant_fold_test_13: 567; GFX6: ; %bb.0: 568; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 569; GFX6-NEXT: s_mov_b32 s2, 0x100010 570; GFX6-NEXT: s_bfe_i32 s2, 0x1fffe, s2 571; GFX6-NEXT: v_mov_b32_e32 v0, s2 572; GFX6-NEXT: s_mov_b32 s2, -1 573; GFX6-NEXT: s_mov_b32 s3, 0xf000 574; GFX6-NEXT: s_waitcnt lgkmcnt(0) 575; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 576; GFX6-NEXT: s_endpgm 577 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 131070, i32 16, i32 16) 578 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 579 ret void 580} 581 582define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { 583; GFX6-LABEL: bfe_i32_constant_fold_test_14: 584; GFX6: ; %bb.0: 585; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 586; GFX6-NEXT: s_mov_b32 s2, 0x1e0002 587; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 588; GFX6-NEXT: v_mov_b32_e32 v0, s2 589; GFX6-NEXT: s_mov_b32 s2, -1 590; GFX6-NEXT: s_mov_b32 s3, 0xf000 591; GFX6-NEXT: s_waitcnt lgkmcnt(0) 592; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 593; GFX6-NEXT: s_endpgm 594 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 2, i32 30) 595 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 596 ret void 597} 598 599define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { 600; GFX6-LABEL: bfe_i32_constant_fold_test_15: 601; GFX6: ; %bb.0: 602; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 603; GFX6-NEXT: s_mov_b32 s2, 0x1c0004 604; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 605; GFX6-NEXT: v_mov_b32_e32 v0, s2 606; GFX6-NEXT: s_mov_b32 s2, -1 607; GFX6-NEXT: s_mov_b32 s3, 0xf000 608; GFX6-NEXT: s_waitcnt lgkmcnt(0) 609; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 610; GFX6-NEXT: s_endpgm 611 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 28) 612 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 613 ret void 614} 615 616define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { 617; GFX6-LABEL: bfe_i32_constant_fold_test_16: 618; GFX6: ; %bb.0: 619; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 620; GFX6-NEXT: s_bfe_i32 s2, -1, 0x70001 621; GFX6-NEXT: v_mov_b32_e32 v0, s2 622; GFX6-NEXT: s_mov_b32 s2, -1 623; GFX6-NEXT: s_mov_b32 s3, 0xf000 624; GFX6-NEXT: s_waitcnt lgkmcnt(0) 625; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 626; GFX6-NEXT: s_endpgm 627 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 1, i32 7) 628 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 629 ret void 630} 631 632define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { 633; GFX6-LABEL: bfe_i32_constant_fold_test_17: 634; GFX6: ; %bb.0: 635; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 636; GFX6-NEXT: s_mov_b32 s2, 0x1f0001 637; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2 638; GFX6-NEXT: v_mov_b32_e32 v0, s2 639; GFX6-NEXT: s_mov_b32 s2, -1 640; GFX6-NEXT: s_mov_b32 s3, 0xf000 641; GFX6-NEXT: s_waitcnt lgkmcnt(0) 642; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 643; GFX6-NEXT: s_endpgm 644 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 1, i32 31) 645 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 646 ret void 647} 648 649define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { 650; GFX6-LABEL: bfe_i32_constant_fold_test_18: 651; GFX6: ; %bb.0: 652; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 653; GFX6-NEXT: s_mov_b32 s2, 0x1001f 654; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2 655; GFX6-NEXT: v_mov_b32_e32 v0, s2 656; GFX6-NEXT: s_mov_b32 s2, -1 657; GFX6-NEXT: s_mov_b32 s3, 0xf000 658; GFX6-NEXT: s_waitcnt lgkmcnt(0) 659; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 660; GFX6-NEXT: s_endpgm 661 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 31, i32 1) 662 store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 663 ret void 664} 665 666define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 667; GFX6-LABEL: bfe_sext_in_reg_i24: 668; GFX6: ; %bb.0: 669; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 670; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 671; GFX6-NEXT: s_mov_b32 s6, -1 672; GFX6-NEXT: s_mov_b32 s7, 0xf000 673; GFX6-NEXT: s_waitcnt lgkmcnt(0) 674; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 675; GFX6-NEXT: s_waitcnt lgkmcnt(0) 676; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180000 677; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180000 678; GFX6-NEXT: v_mov_b32_e32 v0, s0 679; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 680; GFX6-NEXT: s_endpgm 681 %x = load i32, i32 addrspace(1)* %in, align 4 682 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 0, i32 24) 683 %shl = shl i32 %bfe, 8 684 %ashr = ashr i32 %shl, 8 685 store i32 %ashr, i32 addrspace(1)* %out, align 4 686 ret void 687} 688 689define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 690; GFX6-LABEL: simplify_demanded_bfe_sdiv: 691; GFX6: ; %bb.0: 692; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 2 693; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 694; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb 695; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 696; GFX6-NEXT: s_mov_b32 s6, -1 697; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 698; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 699; GFX6-NEXT: s_waitcnt lgkmcnt(0) 700; GFX6-NEXT: s_load_dword s2, s[2:3], 0x0 701; GFX6-NEXT: s_mov_b32 s7, 0xf000 702; GFX6-NEXT: v_mul_lo_u32 v1, -2, v0 703; GFX6-NEXT: s_waitcnt lgkmcnt(0) 704; GFX6-NEXT: s_bfe_i32 s2, s2, 0x100001 705; GFX6-NEXT: s_ashr_i32 s3, s2, 31 706; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 707; GFX6-NEXT: s_add_i32 s2, s2, s3 708; GFX6-NEXT: s_xor_b32 s2, s2, s3 709; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 710; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 711; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v0 712; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 713; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s2, v1 714; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1 715; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 716; GFX6-NEXT: v_subrev_i32_e64 v2, s[0:1], 2, v1 717; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 718; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 719; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1 720; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 721; GFX6-NEXT: v_xor_b32_e32 v0, s3, v0 722; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 723; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 724; GFX6-NEXT: s_endpgm 725 %src = load i32, i32 addrspace(1)* %in, align 4 726 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16) 727 %div = sdiv i32 %bfe, 2 728 store i32 %div, i32 addrspace(1)* %out, align 4 729 ret void 730} 731 732define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 733; GFX6-LABEL: bfe_0_width: 734; GFX6: ; %bb.0: 735; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 736; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 737; GFX6-NEXT: s_mov_b32 s6, -1 738; GFX6-NEXT: s_mov_b32 s7, 0xf000 739; GFX6-NEXT: s_waitcnt lgkmcnt(0) 740; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 741; GFX6-NEXT: s_waitcnt lgkmcnt(0) 742; GFX6-NEXT: s_bfe_i32 s0, s0, 8 743; GFX6-NEXT: v_mov_b32_e32 v0, s0 744; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 745; GFX6-NEXT: s_endpgm 746 %load = load i32, i32 addrspace(1)* %ptr, align 4 747 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 8, i32 0) 748 store i32 %bfe, i32 addrspace(1)* %out, align 4 749 ret void 750} 751 752define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 753; GFX6-LABEL: bfe_8_bfe_8: 754; GFX6: ; %bb.0: 755; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 756; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 757; GFX6-NEXT: s_mov_b32 s6, -1 758; GFX6-NEXT: s_mov_b32 s7, 0xf000 759; GFX6-NEXT: s_waitcnt lgkmcnt(0) 760; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 761; GFX6-NEXT: s_mov_b32 s1, 0x80000 762; GFX6-NEXT: s_waitcnt lgkmcnt(0) 763; GFX6-NEXT: s_bfe_i32 s0, s0, s1 764; GFX6-NEXT: s_bfe_i32 s0, s0, s1 765; GFX6-NEXT: v_mov_b32_e32 v0, s0 766; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 767; GFX6-NEXT: s_endpgm 768 %load = load i32, i32 addrspace(1)* %ptr, align 4 769 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) 770 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) 771 store i32 %bfe1, i32 addrspace(1)* %out, align 4 772 ret void 773} 774 775define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 776; GFX6-LABEL: bfe_8_bfe_16: 777; GFX6: ; %bb.0: 778; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 779; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 780; GFX6-NEXT: s_mov_b32 s6, -1 781; GFX6-NEXT: s_mov_b32 s7, 0xf000 782; GFX6-NEXT: s_waitcnt lgkmcnt(0) 783; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 784; GFX6-NEXT: s_waitcnt lgkmcnt(0) 785; GFX6-NEXT: s_bfe_i32 s0, s0, 0x80000 786; GFX6-NEXT: s_bfe_i32 s0, s0, 0x100000 787; GFX6-NEXT: v_mov_b32_e32 v0, s0 788; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 789; GFX6-NEXT: s_endpgm 790 %load = load i32, i32 addrspace(1)* %ptr, align 4 791 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) 792 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 16) 793 store i32 %bfe1, i32 addrspace(1)* %out, align 4 794 ret void 795} 796 797; This really should be folded into 1 798define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 799; GFX6-LABEL: bfe_16_bfe_8: 800; GFX6: ; %bb.0: 801; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 802; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 803; GFX6-NEXT: s_mov_b32 s6, -1 804; GFX6-NEXT: s_mov_b32 s7, 0xf000 805; GFX6-NEXT: s_waitcnt lgkmcnt(0) 806; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 807; GFX6-NEXT: s_waitcnt lgkmcnt(0) 808; GFX6-NEXT: s_bfe_i32 s0, s0, 0x100000 809; GFX6-NEXT: s_bfe_i32 s0, s0, 0x80000 810; GFX6-NEXT: v_mov_b32_e32 v0, s0 811; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 812; GFX6-NEXT: s_endpgm 813 %load = load i32, i32 addrspace(1)* %ptr, align 4 814 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 16) 815 %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) 816 store i32 %bfe1, i32 addrspace(1)* %out, align 4 817 ret void 818} 819 820; Make sure there isn't a redundant BFE 821define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 822; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe: 823; GFX6: ; %bb.0: 824; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 825; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 826; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 827; GFX6-NEXT: s_mov_b32 s6, -1 828; GFX6-NEXT: s_mov_b32 s7, 0xf000 829; GFX6-NEXT: s_waitcnt lgkmcnt(0) 830; GFX6-NEXT: s_add_i32 s2, s2, s0 831; GFX6-NEXT: s_bfe_i32 s0, s2, 0x80000 832; GFX6-NEXT: s_sext_i32_i8 s0, s0 833; GFX6-NEXT: v_mov_b32_e32 v0, s0 834; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 835; GFX6-NEXT: s_endpgm 836 %c = add i32 %a, %b ; add to prevent folding into extload 837 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 0, i32 8) 838 %shl = shl i32 %bfe, 24 839 %ashr = ashr i32 %shl, 24 840 store i32 %ashr, i32 addrspace(1)* %out, align 4 841 ret void 842} 843 844define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 845; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe_wrong: 846; GFX6: ; %bb.0: 847; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 848; GFX6-NEXT: s_load_dword s2, s[0:1], 0xb 849; GFX6-NEXT: s_load_dword s0, s[0:1], 0xc 850; GFX6-NEXT: s_mov_b32 s6, -1 851; GFX6-NEXT: s_mov_b32 s7, 0xf000 852; GFX6-NEXT: s_waitcnt lgkmcnt(0) 853; GFX6-NEXT: s_add_i32 s2, s2, s0 854; GFX6-NEXT: s_bfe_i32 s0, s2, 8 855; GFX6-NEXT: s_sext_i32_i8 s0, s0 856; GFX6-NEXT: v_mov_b32_e32 v0, s0 857; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 858; GFX6-NEXT: s_endpgm 859 %c = add i32 %a, %b ; add to prevent folding into extload 860 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 8, i32 0) 861 %shl = shl i32 %bfe, 24 862 %ashr = ashr i32 %shl, 24 863 store i32 %ashr, i32 addrspace(1)* %out, align 4 864 ret void 865} 866 867define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { 868; GFX6-LABEL: sextload_i8_to_i32_bfe: 869; GFX6: ; %bb.0: 870; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 871; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 872; GFX6-NEXT: s_mov_b32 s2, -1 873; GFX6-NEXT: s_mov_b32 s3, 0xf000 874; GFX6-NEXT: s_mov_b64 s[6:7], s[2:3] 875; GFX6-NEXT: s_waitcnt lgkmcnt(0) 876; GFX6-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 877; GFX6-NEXT: s_waitcnt vmcnt(0) 878; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 879; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 880; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 881; GFX6-NEXT: s_endpgm 882 %load = load i8, i8 addrspace(1)* %ptr, align 1 883 %sext = sext i8 %load to i32 884 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 0, i32 8) 885 %shl = shl i32 %bfe, 24 886 %ashr = ashr i32 %shl, 24 887 store i32 %ashr, i32 addrspace(1)* %out, align 4 888 ret void 889} 890 891define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { 892; GFX6-LABEL: sextload_i8_to_i32_bfe_0: 893; GFX6: ; %bb.0: 894; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 895; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 896; GFX6-NEXT: s_mov_b32 s2, -1 897; GFX6-NEXT: s_mov_b32 s3, 0xf000 898; GFX6-NEXT: s_mov_b64 s[6:7], s[2:3] 899; GFX6-NEXT: s_waitcnt lgkmcnt(0) 900; GFX6-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 901; GFX6-NEXT: s_waitcnt vmcnt(0) 902; GFX6-NEXT: v_bfe_i32 v0, v0, 8, 0 903; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 904; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 905; GFX6-NEXT: s_endpgm 906 %load = load i8, i8 addrspace(1)* %ptr, align 1 907 %sext = sext i8 %load to i32 908 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 8, i32 0) 909 %shl = shl i32 %bfe, 24 910 %ashr = ashr i32 %shl, 24 911 store i32 %ashr, i32 addrspace(1)* %out, align 4 912 ret void 913} 914 915define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 916; GFX6-LABEL: sext_in_reg_i1_bfe_offset_0: 917; GFX6: ; %bb.0: 918; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 919; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 920; GFX6-NEXT: s_mov_b32 s6, -1 921; GFX6-NEXT: s_mov_b32 s7, 0xf000 922; GFX6-NEXT: s_waitcnt lgkmcnt(0) 923; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 924; GFX6-NEXT: s_waitcnt lgkmcnt(0) 925; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 926; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 927; GFX6-NEXT: v_mov_b32_e32 v0, s0 928; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 929; GFX6-NEXT: s_endpgm 930 %x = load i32, i32 addrspace(1)* %in, align 4 931 %shl = shl i32 %x, 31 932 %shr = ashr i32 %shl, 31 933 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 0, i32 1) 934 store i32 %bfe, i32 addrspace(1)* %out, align 4 935 ret void 936} 937 938define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 939; GFX6-LABEL: sext_in_reg_i1_bfe_offset_1: 940; GFX6: ; %bb.0: 941; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 942; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 943; GFX6-NEXT: s_mov_b32 s6, -1 944; GFX6-NEXT: s_mov_b32 s7, 0xf000 945; GFX6-NEXT: s_waitcnt lgkmcnt(0) 946; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 947; GFX6-NEXT: s_waitcnt lgkmcnt(0) 948; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 949; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10001 950; GFX6-NEXT: v_mov_b32_e32 v0, s0 951; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 952; GFX6-NEXT: s_endpgm 953 %x = load i32, i32 addrspace(1)* %in, align 4 954 %shl = shl i32 %x, 30 955 %shr = ashr i32 %shl, 30 956 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 1) 957 store i32 %bfe, i32 addrspace(1)* %out, align 4 958 ret void 959} 960 961define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 962; GFX6-LABEL: sext_in_reg_i2_bfe_offset_1: 963; GFX6: ; %bb.0: 964; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 965; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 966; GFX6-NEXT: s_mov_b32 s6, -1 967; GFX6-NEXT: s_mov_b32 s7, 0xf000 968; GFX6-NEXT: s_waitcnt lgkmcnt(0) 969; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 970; GFX6-NEXT: s_waitcnt lgkmcnt(0) 971; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 972; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20001 973; GFX6-NEXT: v_mov_b32_e32 v0, s0 974; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 975; GFX6-NEXT: s_endpgm 976 %x = load i32, i32 addrspace(1)* %in, align 4 977 %shl = shl i32 %x, 30 978 %shr = ashr i32 %shl, 30 979 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 2) 980 store i32 %bfe, i32 addrspace(1)* %out, align 4 981 ret void 982} 983 984declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1 985declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) #1 986 987attributes #0 = { nounwind } 988attributes #1 = { nounwind readnone } 989