1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FLATSCR %s 5 6declare hidden void @external_void_func_void() #0 7 8; GCN-LABEL: {{^}}test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: 9; GCN: s_getpc_b64 s[34:35] 10; GCN-NEXT: s_add_u32 s34, s34, 11; GCN-NEXT: s_addc_u32 s35, s35, 12; GCN-NEXT: s_mov_b32 s32, 0 13; GCN: s_swappc_b64 s[30:31], s[34:35] 14 15; GCN-NEXT: #ASMSTART 16; GCN-NEXT: #ASMEND 17; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35] 18define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { 19 call void @external_void_func_void() 20 call void asm sideeffect "", ""() #0 21 call void @external_void_func_void() 22 ret void 23} 24 25; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: 26; MUBUF: buffer_store_dword 27; FLATSCR: scratch_store_dword 28; GCN: v_writelane_b32 v40, s33, 4 29; GCN: v_writelane_b32 v40, s34, 0 30; GCN: v_writelane_b32 v40, s35, 1 31; GCN: v_writelane_b32 v40, s30, 2 32; GCN: v_writelane_b32 v40, s31, 3 33 34; GCN: s_swappc_b64 35; GCN-NEXT: ;;#ASMSTART 36; GCN-NEXT: ;;#ASMEND 37; GCN-NEXT: s_swappc_b64 38; MUBUF-DAG: v_readlane_b32 s4, v40, 2 39; MUBUF-DAG: v_readlane_b32 s5, v40, 3 40; FLATSCR-DAG: v_readlane_b32 s0, v40, 2 41; FLATSCR-DAG: v_readlane_b32 s1, v40, 3 42; GCN: v_readlane_b32 s35, v40, 1 43; GCN: v_readlane_b32 s34, v40, 0 44 45; GCN: v_readlane_b32 s33, v40, 4 46; MUBUF: buffer_load_dword 47; FLATSCR: scratch_load_dword 48; GCN: s_setpc_b64 49define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { 50 call void @external_void_func_void() 51 call void asm sideeffect "", ""() #0 52 call void @external_void_func_void() 53 ret void 54} 55 56; GCN-LABEL: {{^}}test_func_call_external_void_funcx2: 57; MUBUF: buffer_store_dword v40 58; FLATSCR: scratch_store_dword off, v40 59; GCN: v_writelane_b32 v40, s33, 4 60 61; GCN: s_mov_b32 s33, s32 62; MUBUF: s_add_u32 s32, s32, 0x400 63; FLATSCR: s_add_u32 s32, s32, 16 64; GCN: s_swappc_b64 65; GCN-NEXT: s_swappc_b64 66 67; GCN: v_readlane_b32 s33, v40, 4 68; MUBUF: buffer_load_dword v40 69; FLATSCR: scratch_load_dword v40 70define void @test_func_call_external_void_funcx2() #0 { 71 call void @external_void_func_void() 72 call void @external_void_func_void() 73 ret void 74} 75 76; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31: 77; GCN: s_waitcnt 78; GCN-NEXT: s_mov_b64 [[SAVEPC:s\[[0-9]+:[0-9]+\]]], s[30:31] 79; GCN-NEXT: #ASMSTART 80; GCN: ; clobber 81; GCN-NEXT: #ASMEND 82; GCN-NEXT: s_setpc_b64 [[SAVEPC]] 83define void @void_func_void_clobber_s30_s31() #2 { 84 call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 85 ret void 86} 87 88; GCN-LABEL: {{^}}void_func_void_clobber_vcc: 89; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GCN-NEXT: ;;#ASMSTART 91; GCN-NEXT: ;;#ASMEND 92; GCN-NEXT: s_setpc_b64 s[30:31] 93define hidden void @void_func_void_clobber_vcc() #2 { 94 call void asm sideeffect "", "~{vcc}"() #0 95 ret void 96} 97 98; GCN-LABEL: {{^}}test_call_void_func_void_clobber_vcc: 99; GCN: s_getpc_b64 100; GCN-NEXT: s_add_u32 101; GCN-NEXT: s_addc_u32 102; GCN: s_mov_b64 s[34:35], vcc 103; GCN-NEXT: s_swappc_b64 104; GCN: s_mov_b64 vcc, s[34:35] 105define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(i32 addrspace(1)* %out) #0 { 106 %vcc = call i64 asm sideeffect "; def $0", "={vcc}"() 107 call void @void_func_void_clobber_vcc() 108 %val0 = load volatile i32, i32 addrspace(1)* undef 109 %val1 = load volatile i32, i32 addrspace(1)* undef 110 call void asm sideeffect "; use $0", "{vcc}"(i64 %vcc) 111 ret void 112} 113 114; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_s31: 115; GCN: s_mov_b32 s33, s31 116; GCN-NEXT: s_swappc_b64 117; GCN-NEXT: s_mov_b32 s31, s33 118define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)* %out) #0 { 119 %s31 = call i32 asm sideeffect "; def $0", "={s31}"() 120 call void @external_void_func_void() 121 call void asm sideeffect "; use $0", "{s31}"(i32 %s31) 122 ret void 123} 124 125; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_v31: 126; GCN: v_mov_b32_e32 v40, v31 127; GCN-NEXT: s_swappc_b64 128; GCN-NEXT: v_mov_b32_e32 v31, v40 129define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)* %out) #0 { 130 %v31 = call i32 asm sideeffect "; def $0", "={v31}"() 131 call void @external_void_func_void() 132 call void asm sideeffect "; use $0", "{v31}"(i32 %v31) 133 ret void 134} 135 136; FIXME: What is the expected behavior for reserved registers here? 137 138; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33: 139; MUBUF: s_getpc_b64 s[4:5] 140; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 141; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 142; FLATSCR: s_getpc_b64 s[0:1] 143; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 144; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 145; GCN: s_mov_b32 s32, 0 146; GCN: #ASMSTART 147; GCN-NEXT: ; def s33 148; GCN-NEXT: #ASMEND 149; MUBUF: s_swappc_b64 s[30:31], s[4:5] 150; FLATSCR: s_swappc_b64 s[30:31], s[0:1] 151; GCN: ;;#ASMSTART 152; GCN-NEXT: ; use s33 153; GCN-NEXT: ;;#ASMEND 154; GCN-NOT: s33 155; GCN-NEXT: s_endpgm 156define amdgpu_kernel void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* %out) #0 { 157 %s33 = call i32 asm sideeffect "; def $0", "={s33}"() 158 call void @external_void_func_void() 159 call void asm sideeffect "; use $0", "{s33}"(i32 %s33) 160 ret void 161} 162 163; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: {{.*}} 164; GCN-NOT: s34 165 166; MUBUF: s_getpc_b64 s[4:5] 167; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 168; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 169; FLATSCR: s_getpc_b64 s[0:1] 170; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 171; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 172; GCN: s_mov_b32 s32, 0 173 174; GCN-NOT: s34 175; GCN: ;;#ASMSTART 176; GCN-NEXT: ; def s34 177; GCN-NEXT: ;;#ASMEND 178 179; GCN-NOT: s34 180; MUBUF: s_swappc_b64 s[30:31], s[4:5] 181; FLATSCR: s_swappc_b64 s[30:31], s[0:1] 182 183; GCN-NOT: s34 184 185; GCN-NEXT: ;;#ASMSTART 186; GCN-NEXT: ; use s34 187; GCN-NEXT: ;;#ASMEND 188; GCN-NEXT: s_endpgm 189define amdgpu_kernel void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* %out) #0 { 190 %s34 = call i32 asm sideeffect "; def $0", "={s34}"() 191 call void @external_void_func_void() 192 call void asm sideeffect "; use $0", "{s34}"(i32 %s34) 193 ret void 194} 195 196; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v40: {{.*}} 197 198; GCN-NOT: v32 199; MUBUF: s_getpc_b64 s[4:5] 200; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 201; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 202; FLATSCR: s_getpc_b64 s[0:1] 203; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 204; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 205; GCN: s_mov_b32 s32, 0 206; GCN-NOT: v40 207 208; GCN: ;;#ASMSTART 209; GCN-NEXT: ; def v40 210; GCN-NEXT: ;;#ASMEND 211 212; MUBUF: s_swappc_b64 s[30:31], s[4:5] 213; FLATSCR: s_swappc_b64 s[30:31], s[0:1] 214 215; GCN-NOT: v40 216 217; GCN: ;;#ASMSTART 218; GCN-NEXT: ; use v40 219; GCN-NEXT: ;;#ASMEND 220; GCN-NEXT: s_endpgm 221define amdgpu_kernel void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* %out) #0 { 222 %v40 = call i32 asm sideeffect "; def $0", "={v40}"() 223 call void @external_void_func_void() 224 call void asm sideeffect "; use $0", "{v40}"(i32 %v40) 225 ret void 226} 227 228; GCN-LABEL: {{^}}void_func_void_clobber_s33: 229; GCN: v_writelane_b32 v0, s33, 0 230; GCN-NEXT: #ASMSTART 231; GCN-NEXT: ; clobber 232; GCN-NEXT: #ASMEND 233; GCN-NEXT: v_readlane_b32 s33, v0, 0 234; GCN: s_setpc_b64 235define hidden void @void_func_void_clobber_s33() #2 { 236 call void asm sideeffect "; clobber", "~{s33}"() #0 237 ret void 238} 239 240; GCN-LABEL: {{^}}void_func_void_clobber_s34: 241; GCN: v_writelane_b32 v0, s34, 0 242; GCN-NEXT: #ASMSTART 243; GCN-NEXT: ; clobber 244; GCN-NEXT: #ASMEND 245; GCN-NEXT: v_readlane_b32 s34, v0, 0 246; GCN: s_setpc_b64 247define hidden void @void_func_void_clobber_s34() #2 { 248 call void asm sideeffect "; clobber", "~{s34}"() #0 249 ret void 250} 251 252; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s33: 253; GCN: s_getpc_b64 254; GCN-NEXT: s_add_u32 255; GCN-NEXT: s_addc_u32 256; GCN-NEXT: s_mov_b32 s32, 0 257; GCN: s_swappc_b64 258; GCN-NEXT: s_endpgm 259define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 { 260 call void @void_func_void_clobber_s33() 261 ret void 262} 263 264; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s34: 265; GCN: s_getpc_b64 266; GCN-NEXT: s_add_u32 267; GCN-NEXT: s_addc_u32 268; GCN-NEXT: s_mov_b32 s32, 0 269; GCN: s_swappc_b64 270; GCN-NEXT: s_endpgm 271define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 { 272 call void @void_func_void_clobber_s34() 273 ret void 274} 275 276; GCN-LABEL: {{^}}callee_saved_sgpr_func: 277; GCN-NOT: s40 278; GCN: v_writelane_b32 v40, s40 279; GCN: s_swappc_b64 280; GCN-NOT: s40 281; GCN: ; use s40 282; GCN-NOT: s40 283; GCN: v_readlane_b32 s40, v40 284; GCN-NOT: s40 285define void @callee_saved_sgpr_func() #2 { 286 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 287 call void @external_void_func_void() 288 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 289 ret void 290} 291 292; GCN-LABEL: {{^}}callee_saved_sgpr_kernel: 293; GCN-NOT: s40 294; GCN: ; def s40 295; GCN-NOT: s40 296; GCN: s_swappc_b64 297; GCN-NOT: s40 298; GCN: ; use s40 299; GCN-NOT: s40 300define amdgpu_kernel void @callee_saved_sgpr_kernel() #2 { 301 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 302 call void @external_void_func_void() 303 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 304 ret void 305} 306 307; First call preserved VGPR is used so it can't be used for SGPR spills. 308; GCN-LABEL: {{^}}callee_saved_sgpr_vgpr_func: 309; GCN-NOT: s40 310; GCN: v_writelane_b32 v41, s40 311; GCN: s_swappc_b64 312; GCN-NOT: s40 313; GCN: ; use s40 314; GCN-NOT: s40 315; GCN: v_readlane_b32 s40, v41 316; GCN-NOT: s40 317define void @callee_saved_sgpr_vgpr_func() #2 { 318 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 319 %v40 = call i32 asm sideeffect "; def v40", "={v40}"() #0 320 call void @external_void_func_void() 321 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 322 call void asm sideeffect "; use $0", "v"(i32 %v40) #0 323 ret void 324} 325 326; GCN-LABEL: {{^}}callee_saved_sgpr_vgpr_kernel: 327; GCN-NOT: s40 328; GCN: ; def s40 329; GCN-NOT: s40 330; GCN: s_swappc_b64 331; GCN-NOT: s40 332; GCN: ; use s40 333; GCN-NOT: s40 334define amdgpu_kernel void @callee_saved_sgpr_vgpr_kernel() #2 { 335 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 336 %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0 337 call void @external_void_func_void() 338 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 339 call void asm sideeffect "; use $0", "v"(i32 %v32) #0 340 ret void 341} 342 343attributes #0 = { nounwind } 344attributes #1 = { nounwind readnone } 345attributes #2 = { nounwind noinline } 346