1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s 4 5declare hidden amdgpu_gfx void @external_void_func_void() #0 6 7define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { 8; GFX9-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 12; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 13; GFX9-NEXT: s_mov_b64 exec, s[4:5] 14; GFX9-NEXT: v_writelane_b32 v40, s33, 4 15; GFX9-NEXT: v_writelane_b32 v40, s34, 0 16; GFX9-NEXT: v_writelane_b32 v40, s35, 1 17; GFX9-NEXT: v_writelane_b32 v40, s30, 2 18; GFX9-NEXT: s_mov_b32 s33, s32 19; GFX9-NEXT: s_add_u32 s32, s32, 0x400 20; GFX9-NEXT: s_getpc_b64 s[34:35] 21; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 22; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 23; GFX9-NEXT: v_writelane_b32 v40, s31, 3 24; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 25; GFX9-NEXT: ;;#ASMSTART 26; GFX9-NEXT: ;;#ASMEND 27; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] 28; GFX9-NEXT: v_readlane_b32 s4, v40, 2 29; GFX9-NEXT: v_readlane_b32 s5, v40, 3 30; GFX9-NEXT: v_readlane_b32 s35, v40, 1 31; GFX9-NEXT: v_readlane_b32 s34, v40, 0 32; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 33; GFX9-NEXT: v_readlane_b32 s33, v40, 4 34; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 35; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 36; GFX9-NEXT: s_mov_b64 exec, s[6:7] 37; GFX9-NEXT: s_waitcnt vmcnt(0) 38; GFX9-NEXT: s_setpc_b64 s[4:5] 39; 40; GFX10-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: 41; GFX10: ; %bb.0: 42; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 44; GFX10-NEXT: s_or_saveexec_b32 s4, -1 45; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 46; GFX10-NEXT: s_waitcnt_depctr 0xffe3 47; GFX10-NEXT: s_mov_b32 exec_lo, s4 48; GFX10-NEXT: v_writelane_b32 v40, s33, 4 49; GFX10-NEXT: s_mov_b32 s33, s32 50; GFX10-NEXT: s_add_u32 s32, s32, 0x200 51; GFX10-NEXT: v_writelane_b32 v40, s34, 0 52; GFX10-NEXT: v_writelane_b32 v40, s35, 1 53; GFX10-NEXT: s_getpc_b64 s[34:35] 54; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 55; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 56; GFX10-NEXT: v_writelane_b32 v40, s30, 2 57; GFX10-NEXT: v_writelane_b32 v40, s31, 3 58; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 59; GFX10-NEXT: ;;#ASMSTART 60; GFX10-NEXT: ;;#ASMEND 61; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] 62; GFX10-NEXT: v_readlane_b32 s4, v40, 2 63; GFX10-NEXT: v_readlane_b32 s5, v40, 3 64; GFX10-NEXT: v_readlane_b32 s35, v40, 1 65; GFX10-NEXT: v_readlane_b32 s34, v40, 0 66; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 67; GFX10-NEXT: v_readlane_b32 s33, v40, 4 68; GFX10-NEXT: s_or_saveexec_b32 s6, -1 69; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 70; GFX10-NEXT: s_waitcnt_depctr 0xffe3 71; GFX10-NEXT: s_mov_b32 exec_lo, s6 72; GFX10-NEXT: s_waitcnt vmcnt(0) 73; GFX10-NEXT: s_setpc_b64 s[4:5] 74 call amdgpu_gfx void @external_void_func_void() 75 call void asm sideeffect "", ""() #0 76 call amdgpu_gfx void @external_void_func_void() 77 ret void 78} 79 80define amdgpu_gfx void @void_func_void_clobber_s30_s31() #1 { 81; GFX9-LABEL: void_func_void_clobber_s30_s31: 82; GFX9: ; %bb.0: 83; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 84; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] 85; GFX9-NEXT: ;;#ASMSTART 86; GFX9-NEXT: ; clobber 87; GFX9-NEXT: ;;#ASMEND 88; GFX9-NEXT: s_setpc_b64 s[4:5] 89; 90; GFX10-LABEL: void_func_void_clobber_s30_s31: 91; GFX10: ; %bb.0: 92; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 93; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 94; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] 95; GFX10-NEXT: ;;#ASMSTART 96; GFX10-NEXT: ; clobber 97; GFX10-NEXT: ;;#ASMEND 98; GFX10-NEXT: s_setpc_b64 s[4:5] 99 call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 100 ret void 101} 102 103define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)* %out) #0 { 104; GFX9-LABEL: test_call_void_func_void_mayclobber_s31: 105; GFX9: ; %bb.0: 106; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 108; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 109; GFX9-NEXT: s_mov_b64 exec, s[4:5] 110; GFX9-NEXT: v_writelane_b32 v40, s33, 3 111; GFX9-NEXT: v_writelane_b32 v40, s34, 0 112; GFX9-NEXT: v_writelane_b32 v40, s30, 1 113; GFX9-NEXT: s_mov_b32 s33, s32 114; GFX9-NEXT: s_add_u32 s32, s32, 0x400 115; GFX9-NEXT: v_writelane_b32 v40, s31, 2 116; GFX9-NEXT: ;;#ASMSTART 117; GFX9-NEXT: ; def s31 118; GFX9-NEXT: ;;#ASMEND 119; GFX9-NEXT: s_getpc_b64 s[4:5] 120; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 121; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 122; GFX9-NEXT: s_mov_b32 s34, s31 123; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 124; GFX9-NEXT: v_readlane_b32 s4, v40, 1 125; GFX9-NEXT: s_mov_b32 s31, s34 126; GFX9-NEXT: ;;#ASMSTART 127; GFX9-NEXT: ; use s31 128; GFX9-NEXT: ;;#ASMEND 129; GFX9-NEXT: v_readlane_b32 s5, v40, 2 130; GFX9-NEXT: v_readlane_b32 s34, v40, 0 131; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 132; GFX9-NEXT: v_readlane_b32 s33, v40, 3 133; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 134; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 135; GFX9-NEXT: s_mov_b64 exec, s[6:7] 136; GFX9-NEXT: s_waitcnt vmcnt(0) 137; GFX9-NEXT: s_setpc_b64 s[4:5] 138; 139; GFX10-LABEL: test_call_void_func_void_mayclobber_s31: 140; GFX10: ; %bb.0: 141; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 143; GFX10-NEXT: s_or_saveexec_b32 s4, -1 144; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 145; GFX10-NEXT: s_waitcnt_depctr 0xffe3 146; GFX10-NEXT: s_mov_b32 exec_lo, s4 147; GFX10-NEXT: v_writelane_b32 v40, s33, 3 148; GFX10-NEXT: s_mov_b32 s33, s32 149; GFX10-NEXT: s_add_u32 s32, s32, 0x200 150; GFX10-NEXT: s_getpc_b64 s[4:5] 151; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 152; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 153; GFX10-NEXT: v_writelane_b32 v40, s34, 0 154; GFX10-NEXT: v_writelane_b32 v40, s30, 1 155; GFX10-NEXT: v_writelane_b32 v40, s31, 2 156; GFX10-NEXT: ;;#ASMSTART 157; GFX10-NEXT: ; def s31 158; GFX10-NEXT: ;;#ASMEND 159; GFX10-NEXT: s_mov_b32 s34, s31 160; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 161; GFX10-NEXT: v_readlane_b32 s4, v40, 1 162; GFX10-NEXT: s_mov_b32 s31, s34 163; GFX10-NEXT: ;;#ASMSTART 164; GFX10-NEXT: ; use s31 165; GFX10-NEXT: ;;#ASMEND 166; GFX10-NEXT: v_readlane_b32 s5, v40, 2 167; GFX10-NEXT: v_readlane_b32 s34, v40, 0 168; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 169; GFX10-NEXT: v_readlane_b32 s33, v40, 3 170; GFX10-NEXT: s_or_saveexec_b32 s6, -1 171; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 172; GFX10-NEXT: s_waitcnt_depctr 0xffe3 173; GFX10-NEXT: s_mov_b32 exec_lo, s6 174; GFX10-NEXT: s_waitcnt vmcnt(0) 175; GFX10-NEXT: s_setpc_b64 s[4:5] 176 %s31 = call i32 asm sideeffect "; def $0", "={s31}"() 177 call amdgpu_gfx void @external_void_func_void() 178 call void asm sideeffect "; use $0", "{s31}"(i32 %s31) 179 ret void 180} 181 182define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)* %out) #0 { 183; GFX9-LABEL: test_call_void_func_void_mayclobber_v31: 184; GFX9: ; %bb.0: 185; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 187; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 188; GFX9-NEXT: s_mov_b64 exec, s[4:5] 189; GFX9-NEXT: v_writelane_b32 v41, s33, 2 190; GFX9-NEXT: v_writelane_b32 v41, s30, 0 191; GFX9-NEXT: s_mov_b32 s33, s32 192; GFX9-NEXT: s_add_u32 s32, s32, 0x400 193; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 194; GFX9-NEXT: ;;#ASMSTART 195; GFX9-NEXT: ; def v31 196; GFX9-NEXT: ;;#ASMEND 197; GFX9-NEXT: s_getpc_b64 s[4:5] 198; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 199; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 200; GFX9-NEXT: v_writelane_b32 v41, s31, 1 201; GFX9-NEXT: v_mov_b32_e32 v40, v31 202; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 203; GFX9-NEXT: v_mov_b32_e32 v31, v40 204; GFX9-NEXT: ;;#ASMSTART 205; GFX9-NEXT: ; use v31 206; GFX9-NEXT: ;;#ASMEND 207; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 208; GFX9-NEXT: v_readlane_b32 s4, v41, 0 209; GFX9-NEXT: v_readlane_b32 s5, v41, 1 210; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 211; GFX9-NEXT: v_readlane_b32 s33, v41, 2 212; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 213; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 214; GFX9-NEXT: s_mov_b64 exec, s[6:7] 215; GFX9-NEXT: s_waitcnt vmcnt(0) 216; GFX9-NEXT: s_setpc_b64 s[4:5] 217; 218; GFX10-LABEL: test_call_void_func_void_mayclobber_v31: 219; GFX10: ; %bb.0: 220; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 221; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 222; GFX10-NEXT: s_or_saveexec_b32 s4, -1 223; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 224; GFX10-NEXT: s_waitcnt_depctr 0xffe3 225; GFX10-NEXT: s_mov_b32 exec_lo, s4 226; GFX10-NEXT: v_writelane_b32 v41, s33, 2 227; GFX10-NEXT: s_mov_b32 s33, s32 228; GFX10-NEXT: s_add_u32 s32, s32, 0x200 229; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 230; GFX10-NEXT: ;;#ASMSTART 231; GFX10-NEXT: ; def v31 232; GFX10-NEXT: ;;#ASMEND 233; GFX10-NEXT: v_writelane_b32 v41, s30, 0 234; GFX10-NEXT: s_getpc_b64 s[4:5] 235; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 236; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 237; GFX10-NEXT: v_mov_b32_e32 v40, v31 238; GFX10-NEXT: v_writelane_b32 v41, s31, 1 239; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 240; GFX10-NEXT: v_mov_b32_e32 v31, v40 241; GFX10-NEXT: ;;#ASMSTART 242; GFX10-NEXT: ; use v31 243; GFX10-NEXT: ;;#ASMEND 244; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 245; GFX10-NEXT: v_readlane_b32 s4, v41, 0 246; GFX10-NEXT: v_readlane_b32 s5, v41, 1 247; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 248; GFX10-NEXT: v_readlane_b32 s33, v41, 2 249; GFX10-NEXT: s_or_saveexec_b32 s6, -1 250; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 251; GFX10-NEXT: s_waitcnt_depctr 0xffe3 252; GFX10-NEXT: s_mov_b32 exec_lo, s6 253; GFX10-NEXT: s_waitcnt vmcnt(0) 254; GFX10-NEXT: s_setpc_b64 s[4:5] 255 %v31 = call i32 asm sideeffect "; def $0", "={v31}"() 256 call amdgpu_gfx void @external_void_func_void() 257 call void asm sideeffect "; use $0", "{v31}"(i32 %v31) 258 ret void 259} 260 261 262define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* %out) #0 { 263; GFX9-LABEL: test_call_void_func_void_preserves_s33: 264; GFX9: ; %bb.0: 265; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 267; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 268; GFX9-NEXT: s_mov_b64 exec, s[4:5] 269; GFX9-NEXT: v_writelane_b32 v40, s33, 3 270; GFX9-NEXT: s_mov_b32 s33, s32 271; GFX9-NEXT: v_writelane_b32 v40, s33, 0 272; GFX9-NEXT: v_writelane_b32 v40, s30, 1 273; GFX9-NEXT: s_add_u32 s32, s32, 0x400 274; GFX9-NEXT: s_getpc_b64 s[4:5] 275; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 276; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 277; GFX9-NEXT: v_writelane_b32 v40, s31, 2 278; GFX9-NEXT: ;;#ASMSTART 279; GFX9-NEXT: ; def s33 280; GFX9-NEXT: ;;#ASMEND 281; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 282; GFX9-NEXT: ;;#ASMSTART 283; GFX9-NEXT: ; use s33 284; GFX9-NEXT: ;;#ASMEND 285; GFX9-NEXT: v_readlane_b32 s4, v40, 1 286; GFX9-NEXT: v_readlane_b32 s33, v40, 0 287; GFX9-NEXT: v_readlane_b32 s5, v40, 2 288; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 289; GFX9-NEXT: v_readlane_b32 s33, v40, 3 290; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 291; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 292; GFX9-NEXT: s_mov_b64 exec, s[6:7] 293; GFX9-NEXT: s_waitcnt vmcnt(0) 294; GFX9-NEXT: s_setpc_b64 s[4:5] 295; 296; GFX10-LABEL: test_call_void_func_void_preserves_s33: 297; GFX10: ; %bb.0: 298; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 299; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 300; GFX10-NEXT: s_or_saveexec_b32 s4, -1 301; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 302; GFX10-NEXT: s_waitcnt_depctr 0xffe3 303; GFX10-NEXT: s_mov_b32 exec_lo, s4 304; GFX10-NEXT: v_writelane_b32 v40, s33, 3 305; GFX10-NEXT: s_mov_b32 s33, s32 306; GFX10-NEXT: s_add_u32 s32, s32, 0x200 307; GFX10-NEXT: s_getpc_b64 s[4:5] 308; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 309; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 310; GFX10-NEXT: v_writelane_b32 v40, s33, 0 311; GFX10-NEXT: ;;#ASMSTART 312; GFX10-NEXT: ; def s33 313; GFX10-NEXT: ;;#ASMEND 314; GFX10-NEXT: v_writelane_b32 v40, s30, 1 315; GFX10-NEXT: v_writelane_b32 v40, s31, 2 316; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 317; GFX10-NEXT: ;;#ASMSTART 318; GFX10-NEXT: ; use s33 319; GFX10-NEXT: ;;#ASMEND 320; GFX10-NEXT: v_readlane_b32 s4, v40, 1 321; GFX10-NEXT: v_readlane_b32 s33, v40, 0 322; GFX10-NEXT: v_readlane_b32 s5, v40, 2 323; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 324; GFX10-NEXT: v_readlane_b32 s33, v40, 3 325; GFX10-NEXT: s_or_saveexec_b32 s6, -1 326; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 327; GFX10-NEXT: s_waitcnt_depctr 0xffe3 328; GFX10-NEXT: s_mov_b32 exec_lo, s6 329; GFX10-NEXT: s_waitcnt vmcnt(0) 330; GFX10-NEXT: s_setpc_b64 s[4:5] 331 %s33 = call i32 asm sideeffect "; def $0", "={s33}"() 332 call amdgpu_gfx void @external_void_func_void() 333 call void asm sideeffect "; use $0", "{s33}"(i32 %s33) 334 ret void 335} 336 337define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* %out) #0 { 338; GFX9-LABEL: test_call_void_func_void_preserves_s34: 339; GFX9: ; %bb.0: 340; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 341; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 342; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 343; GFX9-NEXT: s_mov_b64 exec, s[4:5] 344; GFX9-NEXT: v_writelane_b32 v40, s33, 3 345; GFX9-NEXT: v_writelane_b32 v40, s34, 0 346; GFX9-NEXT: v_writelane_b32 v40, s30, 1 347; GFX9-NEXT: s_mov_b32 s33, s32 348; GFX9-NEXT: s_add_u32 s32, s32, 0x400 349; GFX9-NEXT: s_getpc_b64 s[4:5] 350; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 351; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 352; GFX9-NEXT: v_writelane_b32 v40, s31, 2 353; GFX9-NEXT: ;;#ASMSTART 354; GFX9-NEXT: ; def s34 355; GFX9-NEXT: ;;#ASMEND 356; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 357; GFX9-NEXT: v_readlane_b32 s4, v40, 1 358; GFX9-NEXT: ;;#ASMSTART 359; GFX9-NEXT: ; use s34 360; GFX9-NEXT: ;;#ASMEND 361; GFX9-NEXT: v_readlane_b32 s5, v40, 2 362; GFX9-NEXT: v_readlane_b32 s34, v40, 0 363; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 364; GFX9-NEXT: v_readlane_b32 s33, v40, 3 365; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 366; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 367; GFX9-NEXT: s_mov_b64 exec, s[6:7] 368; GFX9-NEXT: s_waitcnt vmcnt(0) 369; GFX9-NEXT: s_setpc_b64 s[4:5] 370; 371; GFX10-LABEL: test_call_void_func_void_preserves_s34: 372; GFX10: ; %bb.0: 373; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 375; GFX10-NEXT: s_or_saveexec_b32 s4, -1 376; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 377; GFX10-NEXT: s_waitcnt_depctr 0xffe3 378; GFX10-NEXT: s_mov_b32 exec_lo, s4 379; GFX10-NEXT: v_writelane_b32 v40, s33, 3 380; GFX10-NEXT: s_mov_b32 s33, s32 381; GFX10-NEXT: s_add_u32 s32, s32, 0x200 382; GFX10-NEXT: s_getpc_b64 s[4:5] 383; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 384; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 385; GFX10-NEXT: v_writelane_b32 v40, s34, 0 386; GFX10-NEXT: ;;#ASMSTART 387; GFX10-NEXT: ; def s34 388; GFX10-NEXT: ;;#ASMEND 389; GFX10-NEXT: v_writelane_b32 v40, s30, 1 390; GFX10-NEXT: v_writelane_b32 v40, s31, 2 391; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 392; GFX10-NEXT: v_readlane_b32 s4, v40, 1 393; GFX10-NEXT: ;;#ASMSTART 394; GFX10-NEXT: ; use s34 395; GFX10-NEXT: ;;#ASMEND 396; GFX10-NEXT: v_readlane_b32 s5, v40, 2 397; GFX10-NEXT: v_readlane_b32 s34, v40, 0 398; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 399; GFX10-NEXT: v_readlane_b32 s33, v40, 3 400; GFX10-NEXT: s_or_saveexec_b32 s6, -1 401; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 402; GFX10-NEXT: s_waitcnt_depctr 0xffe3 403; GFX10-NEXT: s_mov_b32 exec_lo, s6 404; GFX10-NEXT: s_waitcnt vmcnt(0) 405; GFX10-NEXT: s_setpc_b64 s[4:5] 406 %s34 = call i32 asm sideeffect "; def $0", "={s34}"() 407 call amdgpu_gfx void @external_void_func_void() 408 call void asm sideeffect "; use $0", "{s34}"(i32 %s34) 409 ret void 410} 411 412define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* %out) #0 { 413; GFX9-LABEL: test_call_void_func_void_preserves_v40: 414; GFX9: ; %bb.0: 415; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 417; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 418; GFX9-NEXT: s_mov_b64 exec, s[4:5] 419; GFX9-NEXT: v_writelane_b32 v41, s33, 2 420; GFX9-NEXT: v_writelane_b32 v41, s30, 0 421; GFX9-NEXT: s_mov_b32 s33, s32 422; GFX9-NEXT: s_add_u32 s32, s32, 0x400 423; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 424; GFX9-NEXT: s_getpc_b64 s[4:5] 425; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 426; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 427; GFX9-NEXT: v_writelane_b32 v41, s31, 1 428; GFX9-NEXT: ;;#ASMSTART 429; GFX9-NEXT: ; def v40 430; GFX9-NEXT: ;;#ASMEND 431; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 432; GFX9-NEXT: ;;#ASMSTART 433; GFX9-NEXT: ; use v40 434; GFX9-NEXT: ;;#ASMEND 435; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 436; GFX9-NEXT: v_readlane_b32 s4, v41, 0 437; GFX9-NEXT: v_readlane_b32 s5, v41, 1 438; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 439; GFX9-NEXT: v_readlane_b32 s33, v41, 2 440; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 441; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 442; GFX9-NEXT: s_mov_b64 exec, s[6:7] 443; GFX9-NEXT: s_waitcnt vmcnt(0) 444; GFX9-NEXT: s_setpc_b64 s[4:5] 445; 446; GFX10-LABEL: test_call_void_func_void_preserves_v40: 447; GFX10: ; %bb.0: 448; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 449; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 450; GFX10-NEXT: s_or_saveexec_b32 s4, -1 451; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 452; GFX10-NEXT: s_waitcnt_depctr 0xffe3 453; GFX10-NEXT: s_mov_b32 exec_lo, s4 454; GFX10-NEXT: v_writelane_b32 v41, s33, 2 455; GFX10-NEXT: s_mov_b32 s33, s32 456; GFX10-NEXT: s_add_u32 s32, s32, 0x200 457; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 458; GFX10-NEXT: s_getpc_b64 s[4:5] 459; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 460; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 461; GFX10-NEXT: v_writelane_b32 v41, s30, 0 462; GFX10-NEXT: ;;#ASMSTART 463; GFX10-NEXT: ; def v40 464; GFX10-NEXT: ;;#ASMEND 465; GFX10-NEXT: v_writelane_b32 v41, s31, 1 466; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 467; GFX10-NEXT: ;;#ASMSTART 468; GFX10-NEXT: ; use v40 469; GFX10-NEXT: ;;#ASMEND 470; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 471; GFX10-NEXT: v_readlane_b32 s4, v41, 0 472; GFX10-NEXT: v_readlane_b32 s5, v41, 1 473; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 474; GFX10-NEXT: v_readlane_b32 s33, v41, 2 475; GFX10-NEXT: s_or_saveexec_b32 s6, -1 476; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 477; GFX10-NEXT: s_waitcnt_depctr 0xffe3 478; GFX10-NEXT: s_mov_b32 exec_lo, s6 479; GFX10-NEXT: s_waitcnt vmcnt(0) 480; GFX10-NEXT: s_setpc_b64 s[4:5] 481 %v40 = call i32 asm sideeffect "; def $0", "={v40}"() 482 call amdgpu_gfx void @external_void_func_void() 483 call void asm sideeffect "; use $0", "{v40}"(i32 %v40) 484 ret void 485} 486 487define hidden void @void_func_void_clobber_s33() #1 { 488; GFX9-LABEL: void_func_void_clobber_s33: 489; GFX9: ; %bb.0: 490; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 491; GFX9-NEXT: v_writelane_b32 v0, s33, 0 492; GFX9-NEXT: ;;#ASMSTART 493; GFX9-NEXT: ; clobber 494; GFX9-NEXT: ;;#ASMEND 495; GFX9-NEXT: v_readlane_b32 s33, v0, 0 496; GFX9-NEXT: s_setpc_b64 s[30:31] 497; 498; GFX10-LABEL: void_func_void_clobber_s33: 499; GFX10: ; %bb.0: 500; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 501; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 502; GFX10-NEXT: v_writelane_b32 v0, s33, 0 503; GFX10-NEXT: ;;#ASMSTART 504; GFX10-NEXT: ; clobber 505; GFX10-NEXT: ;;#ASMEND 506; GFX10-NEXT: v_readlane_b32 s33, v0, 0 507; GFX10-NEXT: s_setpc_b64 s[30:31] 508 call void asm sideeffect "; clobber", "~{s33}"() #0 509 ret void 510} 511 512define hidden void @void_func_void_clobber_s34() #1 { 513; GFX9-LABEL: void_func_void_clobber_s34: 514; GFX9: ; %bb.0: 515; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 516; GFX9-NEXT: v_writelane_b32 v0, s34, 0 517; GFX9-NEXT: ;;#ASMSTART 518; GFX9-NEXT: ; clobber 519; GFX9-NEXT: ;;#ASMEND 520; GFX9-NEXT: v_readlane_b32 s34, v0, 0 521; GFX9-NEXT: s_setpc_b64 s[30:31] 522; 523; GFX10-LABEL: void_func_void_clobber_s34: 524; GFX10: ; %bb.0: 525; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 526; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 527; GFX10-NEXT: v_writelane_b32 v0, s34, 0 528; GFX10-NEXT: ;;#ASMSTART 529; GFX10-NEXT: ; clobber 530; GFX10-NEXT: ;;#ASMEND 531; GFX10-NEXT: v_readlane_b32 s34, v0, 0 532; GFX10-NEXT: s_setpc_b64 s[30:31] 533 call void asm sideeffect "; clobber", "~{s34}"() #0 534 ret void 535} 536 537define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { 538; GFX9-LABEL: test_call_void_func_void_clobber_s33: 539; GFX9: ; %bb.0: 540; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 541; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 542; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 543; GFX9-NEXT: s_mov_b64 exec, s[4:5] 544; GFX9-NEXT: v_writelane_b32 v40, s33, 2 545; GFX9-NEXT: v_writelane_b32 v40, s30, 0 546; GFX9-NEXT: s_mov_b32 s33, s32 547; GFX9-NEXT: s_add_u32 s32, s32, 0x400 548; GFX9-NEXT: s_getpc_b64 s[4:5] 549; GFX9-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4 550; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12 551; GFX9-NEXT: v_writelane_b32 v40, s31, 1 552; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 553; GFX9-NEXT: v_readlane_b32 s4, v40, 0 554; GFX9-NEXT: v_readlane_b32 s5, v40, 1 555; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 556; GFX9-NEXT: v_readlane_b32 s33, v40, 2 557; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 558; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 559; GFX9-NEXT: s_mov_b64 exec, s[6:7] 560; GFX9-NEXT: s_waitcnt vmcnt(0) 561; GFX9-NEXT: s_setpc_b64 s[4:5] 562; 563; GFX10-LABEL: test_call_void_func_void_clobber_s33: 564; GFX10: ; %bb.0: 565; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 566; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 567; GFX10-NEXT: s_or_saveexec_b32 s4, -1 568; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 569; GFX10-NEXT: s_waitcnt_depctr 0xffe3 570; GFX10-NEXT: s_mov_b32 exec_lo, s4 571; GFX10-NEXT: v_writelane_b32 v40, s33, 2 572; GFX10-NEXT: s_mov_b32 s33, s32 573; GFX10-NEXT: s_add_u32 s32, s32, 0x200 574; GFX10-NEXT: s_getpc_b64 s[4:5] 575; GFX10-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4 576; GFX10-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12 577; GFX10-NEXT: v_writelane_b32 v40, s30, 0 578; GFX10-NEXT: v_writelane_b32 v40, s31, 1 579; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 580; GFX10-NEXT: v_readlane_b32 s4, v40, 0 581; GFX10-NEXT: v_readlane_b32 s5, v40, 1 582; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 583; GFX10-NEXT: v_readlane_b32 s33, v40, 2 584; GFX10-NEXT: s_or_saveexec_b32 s6, -1 585; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 586; GFX10-NEXT: s_waitcnt_depctr 0xffe3 587; GFX10-NEXT: s_mov_b32 exec_lo, s6 588; GFX10-NEXT: s_waitcnt vmcnt(0) 589; GFX10-NEXT: s_setpc_b64 s[4:5] 590 call amdgpu_gfx void @void_func_void_clobber_s33() 591 ret void 592} 593 594define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { 595; GFX9-LABEL: test_call_void_func_void_clobber_s34: 596; GFX9: ; %bb.0: 597; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 598; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 599; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 600; GFX9-NEXT: s_mov_b64 exec, s[4:5] 601; GFX9-NEXT: v_writelane_b32 v40, s33, 2 602; GFX9-NEXT: v_writelane_b32 v40, s30, 0 603; GFX9-NEXT: s_mov_b32 s33, s32 604; GFX9-NEXT: s_add_u32 s32, s32, 0x400 605; GFX9-NEXT: s_getpc_b64 s[4:5] 606; GFX9-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4 607; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12 608; GFX9-NEXT: v_writelane_b32 v40, s31, 1 609; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 610; GFX9-NEXT: v_readlane_b32 s4, v40, 0 611; GFX9-NEXT: v_readlane_b32 s5, v40, 1 612; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 613; GFX9-NEXT: v_readlane_b32 s33, v40, 2 614; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 615; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 616; GFX9-NEXT: s_mov_b64 exec, s[6:7] 617; GFX9-NEXT: s_waitcnt vmcnt(0) 618; GFX9-NEXT: s_setpc_b64 s[4:5] 619; 620; GFX10-LABEL: test_call_void_func_void_clobber_s34: 621; GFX10: ; %bb.0: 622; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 623; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 624; GFX10-NEXT: s_or_saveexec_b32 s4, -1 625; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 626; GFX10-NEXT: s_waitcnt_depctr 0xffe3 627; GFX10-NEXT: s_mov_b32 exec_lo, s4 628; GFX10-NEXT: v_writelane_b32 v40, s33, 2 629; GFX10-NEXT: s_mov_b32 s33, s32 630; GFX10-NEXT: s_add_u32 s32, s32, 0x200 631; GFX10-NEXT: s_getpc_b64 s[4:5] 632; GFX10-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4 633; GFX10-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12 634; GFX10-NEXT: v_writelane_b32 v40, s30, 0 635; GFX10-NEXT: v_writelane_b32 v40, s31, 1 636; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 637; GFX10-NEXT: v_readlane_b32 s4, v40, 0 638; GFX10-NEXT: v_readlane_b32 s5, v40, 1 639; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 640; GFX10-NEXT: v_readlane_b32 s33, v40, 2 641; GFX10-NEXT: s_or_saveexec_b32 s6, -1 642; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 643; GFX10-NEXT: s_waitcnt_depctr 0xffe3 644; GFX10-NEXT: s_mov_b32 exec_lo, s6 645; GFX10-NEXT: s_waitcnt vmcnt(0) 646; GFX10-NEXT: s_setpc_b64 s[4:5] 647 call amdgpu_gfx void @void_func_void_clobber_s34() 648 ret void 649} 650 651define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { 652; GFX9-LABEL: callee_saved_sgpr_kernel: 653; GFX9: ; %bb.0: 654; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 655; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 656; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 657; GFX9-NEXT: s_mov_b64 exec, s[4:5] 658; GFX9-NEXT: v_writelane_b32 v40, s33, 3 659; GFX9-NEXT: v_writelane_b32 v40, s40, 0 660; GFX9-NEXT: v_writelane_b32 v40, s30, 1 661; GFX9-NEXT: s_mov_b32 s33, s32 662; GFX9-NEXT: s_add_u32 s32, s32, 0x400 663; GFX9-NEXT: s_getpc_b64 s[4:5] 664; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 665; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 666; GFX9-NEXT: v_writelane_b32 v40, s31, 2 667; GFX9-NEXT: ;;#ASMSTART 668; GFX9-NEXT: ; def s40 669; GFX9-NEXT: ;;#ASMEND 670; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 671; GFX9-NEXT: v_readlane_b32 s4, v40, 1 672; GFX9-NEXT: ;;#ASMSTART 673; GFX9-NEXT: ; use s40 674; GFX9-NEXT: ;;#ASMEND 675; GFX9-NEXT: v_readlane_b32 s5, v40, 2 676; GFX9-NEXT: v_readlane_b32 s40, v40, 0 677; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 678; GFX9-NEXT: v_readlane_b32 s33, v40, 3 679; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 680; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 681; GFX9-NEXT: s_mov_b64 exec, s[6:7] 682; GFX9-NEXT: s_waitcnt vmcnt(0) 683; GFX9-NEXT: s_setpc_b64 s[4:5] 684; 685; GFX10-LABEL: callee_saved_sgpr_kernel: 686; GFX10: ; %bb.0: 687; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 688; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 689; GFX10-NEXT: s_or_saveexec_b32 s4, -1 690; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 691; GFX10-NEXT: s_waitcnt_depctr 0xffe3 692; GFX10-NEXT: s_mov_b32 exec_lo, s4 693; GFX10-NEXT: v_writelane_b32 v40, s33, 3 694; GFX10-NEXT: s_mov_b32 s33, s32 695; GFX10-NEXT: s_add_u32 s32, s32, 0x200 696; GFX10-NEXT: s_getpc_b64 s[4:5] 697; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 698; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 699; GFX10-NEXT: v_writelane_b32 v40, s40, 0 700; GFX10-NEXT: ;;#ASMSTART 701; GFX10-NEXT: ; def s40 702; GFX10-NEXT: ;;#ASMEND 703; GFX10-NEXT: v_writelane_b32 v40, s30, 1 704; GFX10-NEXT: v_writelane_b32 v40, s31, 2 705; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 706; GFX10-NEXT: v_readlane_b32 s4, v40, 1 707; GFX10-NEXT: ;;#ASMSTART 708; GFX10-NEXT: ; use s40 709; GFX10-NEXT: ;;#ASMEND 710; GFX10-NEXT: v_readlane_b32 s5, v40, 2 711; GFX10-NEXT: v_readlane_b32 s40, v40, 0 712; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 713; GFX10-NEXT: v_readlane_b32 s33, v40, 3 714; GFX10-NEXT: s_or_saveexec_b32 s6, -1 715; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 716; GFX10-NEXT: s_waitcnt_depctr 0xffe3 717; GFX10-NEXT: s_mov_b32 exec_lo, s6 718; GFX10-NEXT: s_waitcnt vmcnt(0) 719; GFX10-NEXT: s_setpc_b64 s[4:5] 720 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 721 call amdgpu_gfx void @external_void_func_void() 722 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 723 ret void 724} 725 726define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { 727; GFX9-LABEL: callee_saved_sgpr_vgpr_kernel: 728; GFX9: ; %bb.0: 729; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 730; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 731; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 732; GFX9-NEXT: s_mov_b64 exec, s[4:5] 733; GFX9-NEXT: v_writelane_b32 v41, s33, 3 734; GFX9-NEXT: v_writelane_b32 v41, s40, 0 735; GFX9-NEXT: v_writelane_b32 v41, s30, 1 736; GFX9-NEXT: s_mov_b32 s33, s32 737; GFX9-NEXT: s_add_u32 s32, s32, 0x400 738; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 739; GFX9-NEXT: ;;#ASMSTART 740; GFX9-NEXT: ; def s40 741; GFX9-NEXT: ;;#ASMEND 742; GFX9-NEXT: ;;#ASMSTART 743; GFX9-NEXT: ; def v32 744; GFX9-NEXT: ;;#ASMEND 745; GFX9-NEXT: s_getpc_b64 s[4:5] 746; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 747; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 748; GFX9-NEXT: v_writelane_b32 v41, s31, 2 749; GFX9-NEXT: v_mov_b32_e32 v40, v32 750; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 751; GFX9-NEXT: ;;#ASMSTART 752; GFX9-NEXT: ; use s40 753; GFX9-NEXT: ;;#ASMEND 754; GFX9-NEXT: ;;#ASMSTART 755; GFX9-NEXT: ; use v40 756; GFX9-NEXT: ;;#ASMEND 757; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 758; GFX9-NEXT: v_readlane_b32 s4, v41, 1 759; GFX9-NEXT: v_readlane_b32 s5, v41, 2 760; GFX9-NEXT: v_readlane_b32 s40, v41, 0 761; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 762; GFX9-NEXT: v_readlane_b32 s33, v41, 3 763; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 764; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 765; GFX9-NEXT: s_mov_b64 exec, s[6:7] 766; GFX9-NEXT: s_waitcnt vmcnt(0) 767; GFX9-NEXT: s_setpc_b64 s[4:5] 768; 769; GFX10-LABEL: callee_saved_sgpr_vgpr_kernel: 770; GFX10: ; %bb.0: 771; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 772; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 773; GFX10-NEXT: s_or_saveexec_b32 s4, -1 774; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 775; GFX10-NEXT: s_waitcnt_depctr 0xffe3 776; GFX10-NEXT: s_mov_b32 exec_lo, s4 777; GFX10-NEXT: v_writelane_b32 v41, s33, 3 778; GFX10-NEXT: s_mov_b32 s33, s32 779; GFX10-NEXT: s_add_u32 s32, s32, 0x200 780; GFX10-NEXT: s_getpc_b64 s[4:5] 781; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 782; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 783; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 784; GFX10-NEXT: v_writelane_b32 v41, s40, 0 785; GFX10-NEXT: ;;#ASMSTART 786; GFX10-NEXT: ; def s40 787; GFX10-NEXT: ;;#ASMEND 788; GFX10-NEXT: ;;#ASMSTART 789; GFX10-NEXT: ; def v32 790; GFX10-NEXT: ;;#ASMEND 791; GFX10-NEXT: v_mov_b32_e32 v40, v32 792; GFX10-NEXT: v_writelane_b32 v41, s30, 1 793; GFX10-NEXT: v_writelane_b32 v41, s31, 2 794; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 795; GFX10-NEXT: ;;#ASMSTART 796; GFX10-NEXT: ; use s40 797; GFX10-NEXT: ;;#ASMEND 798; GFX10-NEXT: ;;#ASMSTART 799; GFX10-NEXT: ; use v40 800; GFX10-NEXT: ;;#ASMEND 801; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 802; GFX10-NEXT: v_readlane_b32 s4, v41, 1 803; GFX10-NEXT: v_readlane_b32 s5, v41, 2 804; GFX10-NEXT: v_readlane_b32 s40, v41, 0 805; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 806; GFX10-NEXT: v_readlane_b32 s33, v41, 3 807; GFX10-NEXT: s_or_saveexec_b32 s6, -1 808; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 809; GFX10-NEXT: s_waitcnt_depctr 0xffe3 810; GFX10-NEXT: s_mov_b32 exec_lo, s6 811; GFX10-NEXT: s_waitcnt vmcnt(0) 812; GFX10-NEXT: s_setpc_b64 s[4:5] 813 %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 814 %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0 815 call amdgpu_gfx void @external_void_func_void() 816 call void asm sideeffect "; use $0", "s"(i32 %s40) #0 817 call void asm sideeffect "; use $0", "v"(i32 %v32) #0 818 ret void 819} 820 821attributes #0 = { nounwind } 822attributes #1 = { nounwind noinline } 823