1; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR %s 2; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s 3; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VMEM %s 4 5; ALL-LABEL: {{^}}spill_sgpr_x2: 6; SMEM: s_add_u32 m0, s3, 0x100{{$}} 7; SMEM: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Spill 8; SMEM: s_cbranch_scc1 9 10; SMEM: s_add_u32 m0, s3, 0x100{{$}} 11; SMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Reload 12 13; SMEM: s_dcache_wb 14; SMEM: s_endpgm 15 16; FIXME: Should only need 4 bytes 17; SMEM: ScratchSize: 12 18 19 20; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 21; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 22; VGPR: s_cbranch_scc1 23 24; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 25; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 26 27; VMEM: buffer_store_dword 28; VMEM: buffer_store_dword 29; VMEM: s_cbranch_scc1 30 31; VMEM: buffer_load_dword 32; VMEM: buffer_load_dword 33define amdgpu_kernel void @spill_sgpr_x2(i32 addrspace(1)* %out, i32 %in) #0 { 34 %wide.sgpr = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 35 %cmp = icmp eq i32 %in, 0 36 br i1 %cmp, label %bb0, label %ret 37 38bb0: 39 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) #0 40 br label %ret 41 42ret: 43 ret void 44} 45 46; ALL-LABEL: {{^}}spill_sgpr_x4: 47; SMEM: s_add_u32 m0, s3, 0x100{{$}} 48; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Spill 49; SMEM: s_cbranch_scc1 50 51; SMEM: s_add_u32 m0, s3, 0x100{{$}} 52; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Reload 53; SMEM: s_dcache_wb 54; SMEM: s_endpgm 55 56; FIXME: Should only need 4 bytes 57; SMEM: ScratchSize: 20 58 59; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 60; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 61; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 62; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 63; VGPR: s_cbranch_scc1 64 65; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 66; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 67; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 68; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 69 70 71; VMEM: buffer_store_dword 72; VMEM: buffer_store_dword 73; VMEM: buffer_store_dword 74; VMEM: buffer_store_dword 75; VMEM: s_cbranch_scc1 76 77; VMEM: buffer_load_dword 78; VMEM: buffer_load_dword 79; VMEM: buffer_load_dword 80; VMEM: buffer_load_dword 81define amdgpu_kernel void @spill_sgpr_x4(i32 addrspace(1)* %out, i32 %in) #0 { 82 %wide.sgpr = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 83 %cmp = icmp eq i32 %in, 0 84 br i1 %cmp, label %bb0, label %ret 85 86bb0: 87 call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) #0 88 br label %ret 89 90ret: 91 ret void 92} 93 94; ALL-LABEL: {{^}}spill_sgpr_x8: 95 96; SMEM: s_add_u32 m0, s3, 0x100{{$}} 97; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill 98; SMEM: s_add_u32 m0, s3, 0x110{{$}} 99; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill 100; SMEM: s_cbranch_scc1 101 102; SMEM: s_add_u32 m0, s3, 0x100{{$}} 103; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload 104; SMEM: s_add_u32 m0, s3, 0x110{{$}} 105; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload 106 107; SMEM: s_dcache_wb 108; SMEM: s_endpgm 109 110; SMEM: ScratchSize: 36 111 112; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 113; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 114; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 115; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 116; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 117; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5 118; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6 119; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7 120; VGPR: s_cbranch_scc1 121 122; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 123; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 124; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 125; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 126; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 127; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5 128; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6 129; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7 130 131; VMEM: buffer_store_dword 132; VMEM: buffer_store_dword 133; VMEM: buffer_store_dword 134; VMEM: buffer_store_dword 135; VMEM: buffer_store_dword 136; VMEM: buffer_store_dword 137; VMEM: buffer_store_dword 138; VMEM: buffer_store_dword 139; VMEM: s_cbranch_scc1 140 141; VMEM: buffer_load_dword 142; VMEM: buffer_load_dword 143; VMEM: buffer_load_dword 144; VMEM: buffer_load_dword 145; VMEM: buffer_load_dword 146; VMEM: buffer_load_dword 147; VMEM: buffer_load_dword 148; VMEM: buffer_load_dword 149define amdgpu_kernel void @spill_sgpr_x8(i32 addrspace(1)* %out, i32 %in) #0 { 150 %wide.sgpr = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 151 %cmp = icmp eq i32 %in, 0 152 br i1 %cmp, label %bb0, label %ret 153 154bb0: 155 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) #0 156 br label %ret 157 158ret: 159 ret void 160} 161 162; FIXME: x16 inlineasm seems broken 163; define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 { 164; %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 165; %cmp = icmp eq i32 %in, 0 166; br i1 %cmp, label %bb0, label %ret 167 168; bb0: 169; call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0 170; br label %ret 171 172; ret: 173; ret void 174; } 175 176attributes #0 = { nounwind } 177