1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=GFX803 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX900 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX1010 %s 5 6define amdgpu_kernel void @test_kern_empty() local_unnamed_addr #0 { 7; GFX803-LABEL: test_kern_empty: 8; GFX803: ; %bb.0: ; %entry 9; GFX803-NEXT: s_endpgm 10; 11; GFX900-LABEL: test_kern_empty: 12; GFX900: ; %bb.0: ; %entry 13; GFX900-NEXT: s_endpgm 14; 15; GFX1010-LABEL: test_kern_empty: 16; GFX1010: ; %bb.0: ; %entry 17; GFX1010-NEXT: s_endpgm 18entry: 19 ret void 20} 21 22define amdgpu_kernel void @test_kern_stack() local_unnamed_addr #0 { 23; GFX803-LABEL: test_kern_stack: 24; GFX803: ; %bb.0: ; %entry 25; GFX803-NEXT: s_add_u32 s4, s4, s7 26; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 27; GFX803-NEXT: s_add_u32 s0, s0, s7 28; GFX803-NEXT: s_addc_u32 s1, s1, 0 29; GFX803-NEXT: v_mov_b32_e32 v0, 0 30; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 31; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 32; GFX803-NEXT: s_endpgm 33; 34; GFX900-LABEL: test_kern_stack: 35; GFX900: ; %bb.0: ; %entry 36; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 37; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 38; GFX900-NEXT: s_add_u32 s0, s0, s7 39; GFX900-NEXT: s_addc_u32 s1, s1, 0 40; GFX900-NEXT: v_mov_b32_e32 v0, 0 41; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 42; GFX900-NEXT: s_endpgm 43; 44; GFX1010-LABEL: test_kern_stack: 45; GFX1010: ; %bb.0: ; %entry 46; GFX1010-NEXT: s_add_u32 s4, s4, s7 47; GFX1010-NEXT: s_addc_u32 s5, s5, 0 48; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 49; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 50; GFX1010-NEXT: v_mov_b32_e32 v0, 0 51; GFX1010-NEXT: s_add_u32 s0, s0, s7 52; GFX1010-NEXT: s_addc_u32 s1, s1, 0 53; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 54; GFX1010-NEXT: s_endpgm 55entry: 56 %x = alloca i32, align 4, addrspace(5) 57 store volatile i32 0, i32 addrspace(5)* %x, align 4 58 ret void 59} 60 61define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 { 62; GFX803-LABEL: test_kern_call: 63; GFX803: ; %bb.0: ; %entry 64; GFX803-NEXT: s_add_u32 s4, s4, s7 65; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 66; GFX803-NEXT: s_add_u32 s0, s0, s7 67; GFX803-NEXT: s_addc_u32 s1, s1, 0 68; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 69; GFX803-NEXT: s_getpc_b64 s[4:5] 70; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 71; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 72; GFX803-NEXT: s_mov_b32 s32, 0 73; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 74; GFX803-NEXT: s_endpgm 75; 76; GFX900-LABEL: test_kern_call: 77; GFX900: ; %bb.0: ; %entry 78; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 79; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 80; GFX900-NEXT: s_add_u32 s0, s0, s7 81; GFX900-NEXT: s_addc_u32 s1, s1, 0 82; GFX900-NEXT: s_getpc_b64 s[4:5] 83; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 84; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 85; GFX900-NEXT: s_mov_b32 s32, 0 86; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 87; GFX900-NEXT: s_endpgm 88; 89; GFX1010-LABEL: test_kern_call: 90; GFX1010: ; %bb.0: ; %entry 91; GFX1010-NEXT: s_add_u32 s4, s4, s7 92; GFX1010-NEXT: s_mov_b32 s32, 0 93; GFX1010-NEXT: s_addc_u32 s5, s5, 0 94; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 95; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 96; GFX1010-NEXT: s_add_u32 s0, s0, s7 97; GFX1010-NEXT: s_addc_u32 s1, s1, 0 98; GFX1010-NEXT: s_getpc_b64 s[4:5] 99; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 100; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 101; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 102; GFX1010-NEXT: s_endpgm 103entry: 104 tail call void @ex() #0 105 ret void 106} 107 108define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 { 109; GFX803-LABEL: test_kern_stack_and_call: 110; GFX803: ; %bb.0: ; %entry 111; GFX803-NEXT: s_add_u32 s4, s4, s7 112; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 113; GFX803-NEXT: s_add_u32 s0, s0, s7 114; GFX803-NEXT: s_addc_u32 s1, s1, 0 115; GFX803-NEXT: v_mov_b32_e32 v0, 0 116; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 117; GFX803-NEXT: s_getpc_b64 s[4:5] 118; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 119; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 120; GFX803-NEXT: s_movk_i32 s32, 0x400 121; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 122; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 123; GFX803-NEXT: s_endpgm 124; 125; GFX900-LABEL: test_kern_stack_and_call: 126; GFX900: ; %bb.0: ; %entry 127; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 128; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 129; GFX900-NEXT: s_add_u32 s0, s0, s7 130; GFX900-NEXT: s_addc_u32 s1, s1, 0 131; GFX900-NEXT: v_mov_b32_e32 v0, 0 132; GFX900-NEXT: s_getpc_b64 s[4:5] 133; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 134; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 135; GFX900-NEXT: s_movk_i32 s32, 0x400 136; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 137; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 138; GFX900-NEXT: s_endpgm 139; 140; GFX1010-LABEL: test_kern_stack_and_call: 141; GFX1010: ; %bb.0: ; %entry 142; GFX1010-NEXT: s_add_u32 s4, s4, s7 143; GFX1010-NEXT: s_movk_i32 s32, 0x200 144; GFX1010-NEXT: s_addc_u32 s5, s5, 0 145; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 146; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 147; GFX1010-NEXT: v_mov_b32_e32 v0, 0 148; GFX1010-NEXT: s_add_u32 s0, s0, s7 149; GFX1010-NEXT: s_addc_u32 s1, s1, 0 150; GFX1010-NEXT: s_getpc_b64 s[4:5] 151; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 152; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 153; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 154; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 155; GFX1010-NEXT: s_endpgm 156entry: 157 %x = alloca i32, align 4, addrspace(5) 158 store volatile i32 0, i32 addrspace(5)* %x, align 4 159 tail call void @ex() #0 160 ret void 161} 162 163define amdgpu_kernel void @test_force_fp_kern_empty() local_unnamed_addr #2 { 164; GFX803-LABEL: test_force_fp_kern_empty: 165; GFX803: ; %bb.0: ; %entry 166; GFX803-NEXT: s_mov_b32 s33, 0 167; GFX803-NEXT: s_endpgm 168; 169; GFX900-LABEL: test_force_fp_kern_empty: 170; GFX900: ; %bb.0: ; %entry 171; GFX900-NEXT: s_mov_b32 s33, 0 172; GFX900-NEXT: s_endpgm 173; 174; GFX1010-LABEL: test_force_fp_kern_empty: 175; GFX1010: ; %bb.0: ; %entry 176; GFX1010-NEXT: s_mov_b32 s33, 0 177; GFX1010-NEXT: s_endpgm 178entry: 179 ret void 180} 181 182define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr #2 { 183; GFX803-LABEL: test_force_fp_kern_stack: 184; GFX803: ; %bb.0: ; %entry 185; GFX803-NEXT: s_add_u32 s4, s4, s7 186; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 187; GFX803-NEXT: s_add_u32 s0, s0, s7 188; GFX803-NEXT: s_mov_b32 s33, 0 189; GFX803-NEXT: s_addc_u32 s1, s1, 0 190; GFX803-NEXT: v_mov_b32_e32 v0, 0 191; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 192; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 193; GFX803-NEXT: s_endpgm 194; 195; GFX900-LABEL: test_force_fp_kern_stack: 196; GFX900: ; %bb.0: ; %entry 197; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 198; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 199; GFX900-NEXT: s_add_u32 s0, s0, s7 200; GFX900-NEXT: s_mov_b32 s33, 0 201; GFX900-NEXT: s_addc_u32 s1, s1, 0 202; GFX900-NEXT: v_mov_b32_e32 v0, 0 203; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 204; GFX900-NEXT: s_endpgm 205; 206; GFX1010-LABEL: test_force_fp_kern_stack: 207; GFX1010: ; %bb.0: ; %entry 208; GFX1010-NEXT: s_add_u32 s4, s4, s7 209; GFX1010-NEXT: s_mov_b32 s33, 0 210; GFX1010-NEXT: s_addc_u32 s5, s5, 0 211; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 212; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 213; GFX1010-NEXT: v_mov_b32_e32 v0, 0 214; GFX1010-NEXT: s_add_u32 s0, s0, s7 215; GFX1010-NEXT: s_addc_u32 s1, s1, 0 216; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 217; GFX1010-NEXT: s_endpgm 218entry: 219 %x = alloca i32, align 4, addrspace(5) 220 store volatile i32 0, i32 addrspace(5)* %x, align 4 221 ret void 222} 223 224define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 { 225; GFX803-LABEL: test_force_fp_kern_call: 226; GFX803: ; %bb.0: ; %entry 227; GFX803-NEXT: s_add_u32 s4, s4, s7 228; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 229; GFX803-NEXT: s_add_u32 s0, s0, s7 230; GFX803-NEXT: s_addc_u32 s1, s1, 0 231; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 232; GFX803-NEXT: s_getpc_b64 s[4:5] 233; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 234; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 235; GFX803-NEXT: s_mov_b32 s32, 0 236; GFX803-NEXT: s_mov_b32 s33, 0 237; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 238; GFX803-NEXT: s_endpgm 239; 240; GFX900-LABEL: test_force_fp_kern_call: 241; GFX900: ; %bb.0: ; %entry 242; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 243; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 244; GFX900-NEXT: s_add_u32 s0, s0, s7 245; GFX900-NEXT: s_addc_u32 s1, s1, 0 246; GFX900-NEXT: s_getpc_b64 s[4:5] 247; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 248; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 249; GFX900-NEXT: s_mov_b32 s32, 0 250; GFX900-NEXT: s_mov_b32 s33, 0 251; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 252; GFX900-NEXT: s_endpgm 253; 254; GFX1010-LABEL: test_force_fp_kern_call: 255; GFX1010: ; %bb.0: ; %entry 256; GFX1010-NEXT: s_add_u32 s4, s4, s7 257; GFX1010-NEXT: s_mov_b32 s32, 0 258; GFX1010-NEXT: s_mov_b32 s33, 0 259; GFX1010-NEXT: s_addc_u32 s5, s5, 0 260; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 261; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 262; GFX1010-NEXT: s_add_u32 s0, s0, s7 263; GFX1010-NEXT: s_addc_u32 s1, s1, 0 264; GFX1010-NEXT: s_getpc_b64 s[4:5] 265; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 266; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 267; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 268; GFX1010-NEXT: s_endpgm 269entry: 270 tail call void @ex() #2 271 ret void 272} 273 274define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr #2 { 275; GFX803-LABEL: test_force_fp_kern_stack_and_call: 276; GFX803: ; %bb.0: ; %entry 277; GFX803-NEXT: s_add_u32 s4, s4, s7 278; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 279; GFX803-NEXT: s_add_u32 s0, s0, s7 280; GFX803-NEXT: s_mov_b32 s33, 0 281; GFX803-NEXT: s_addc_u32 s1, s1, 0 282; GFX803-NEXT: v_mov_b32_e32 v0, 0 283; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 284; GFX803-NEXT: s_getpc_b64 s[4:5] 285; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 286; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 287; GFX803-NEXT: s_movk_i32 s32, 0x400 288; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 289; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] 290; GFX803-NEXT: s_endpgm 291; 292; GFX900-LABEL: test_force_fp_kern_stack_and_call: 293; GFX900: ; %bb.0: ; %entry 294; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 295; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 296; GFX900-NEXT: s_add_u32 s0, s0, s7 297; GFX900-NEXT: s_addc_u32 s1, s1, 0 298; GFX900-NEXT: s_mov_b32 s33, 0 299; GFX900-NEXT: v_mov_b32_e32 v0, 0 300; GFX900-NEXT: s_getpc_b64 s[4:5] 301; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 302; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 303; GFX900-NEXT: s_movk_i32 s32, 0x400 304; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 305; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] 306; GFX900-NEXT: s_endpgm 307; 308; GFX1010-LABEL: test_force_fp_kern_stack_and_call: 309; GFX1010: ; %bb.0: ; %entry 310; GFX1010-NEXT: s_add_u32 s4, s4, s7 311; GFX1010-NEXT: s_movk_i32 s32, 0x200 312; GFX1010-NEXT: s_mov_b32 s33, 0 313; GFX1010-NEXT: s_addc_u32 s5, s5, 0 314; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 315; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 316; GFX1010-NEXT: v_mov_b32_e32 v0, 0 317; GFX1010-NEXT: s_add_u32 s0, s0, s7 318; GFX1010-NEXT: s_addc_u32 s1, s1, 0 319; GFX1010-NEXT: s_getpc_b64 s[4:5] 320; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 321; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+12 322; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 323; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] 324; GFX1010-NEXT: s_endpgm 325entry: 326 %x = alloca i32, align 4, addrspace(5) 327 store volatile i32 0, i32 addrspace(5)* %x, align 4 328 tail call void @ex() #2 329 ret void 330} 331 332define amdgpu_kernel void @test_sgpr_offset_kernel() #1 { 333; GFX803-LABEL: test_sgpr_offset_kernel: 334; GFX803: ; %bb.0: ; %entry 335; GFX803-NEXT: s_add_u32 s4, s4, s7 336; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 337; GFX803-NEXT: s_add_u32 s0, s0, s7 338; GFX803-NEXT: s_addc_u32 s1, s1, 0 339; GFX803-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 340; GFX803-NEXT: s_mov_b32 s4, 0x40000 341; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 342; GFX803-NEXT: s_waitcnt vmcnt(0) 343; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill 344; GFX803-NEXT: ;;#ASMSTART 345; GFX803-NEXT: ;;#ASMEND 346; GFX803-NEXT: s_mov_b32 s4, 0x40000 347; GFX803-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload 348; GFX803-NEXT: s_waitcnt vmcnt(0) 349; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 350; GFX803-NEXT: s_endpgm 351; 352; GFX900-LABEL: test_sgpr_offset_kernel: 353; GFX900: ; %bb.0: ; %entry 354; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 355; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 356; GFX900-NEXT: s_add_u32 s0, s0, s7 357; GFX900-NEXT: s_addc_u32 s1, s1, 0 358; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 359; GFX900-NEXT: s_mov_b32 s6, 0x40000 360; GFX900-NEXT: s_waitcnt vmcnt(0) 361; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 362; GFX900-NEXT: ;;#ASMSTART 363; GFX900-NEXT: ;;#ASMEND 364; GFX900-NEXT: s_mov_b32 s6, 0x40000 365; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 366; GFX900-NEXT: s_waitcnt vmcnt(0) 367; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 368; GFX900-NEXT: s_endpgm 369; 370; GFX1010-LABEL: test_sgpr_offset_kernel: 371; GFX1010: ; %bb.0: ; %entry 372; GFX1010-NEXT: s_add_u32 s4, s4, s7 373; GFX1010-NEXT: s_addc_u32 s5, s5, 0 374; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 375; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 376; GFX1010-NEXT: s_add_u32 s0, s0, s7 377; GFX1010-NEXT: s_addc_u32 s1, s1, 0 378; GFX1010-NEXT: s_mov_b32 s6, 0x20000 379; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 380; GFX1010-NEXT: s_waitcnt vmcnt(0) 381; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill 382; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 383; GFX1010-NEXT: s_mov_b32 s6, 0x20000 384; GFX1010-NEXT: ;;#ASMSTART 385; GFX1010-NEXT: ;;#ASMEND 386; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload 387; GFX1010-NEXT: s_waitcnt vmcnt(0) 388; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 389; GFX1010-NEXT: s_endpgm 390entry: 391 ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not 392 ; fit in the instruction, and has to live in the SGPR offset. 393 %alloca = alloca i8, i32 4092, align 4, addrspace(5) 394 %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* 395 396 %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 397 ; 0x40000 / 64 = 4096 (for wave64) 398 ; CHECK: s_add_u32 s6, s7, 0x40000 399 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill 400 %a = load volatile i32, i32 addrspace(5)* %aptr 401 402 ; Force %a to spill 403 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () 404 405 %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 406 store volatile i32 %a, i32 addrspace(5)* %outptr 407 408 ret void 409} 410 411declare hidden void @ex() local_unnamed_addr #0 412 413attributes #0 = { nounwind } 414attributes #1 = { nounwind "amdgpu-num-vgpr"="8" } 415attributes #2 = { nounwind "frame-pointer"="all" } 416