; RUN: llc -O0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VGPR %s ; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VMEM %s ; GCN-LABEL: {{^}}spill_sgpr_x2: ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 ; VGPR: s_cbranch_scc1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 ; VMEM: buffer_store_dword ; VMEM: s_cbranch_scc1 ; VMEM: buffer_load_dword define amdgpu_kernel void @spill_sgpr_x2(i32 addrspace(1)* %out, i32 %in) #0 { %wide.sgpr = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 %cmp = icmp eq i32 %in, 0 br i1 %cmp, label %bb0, label %ret bb0: call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) #0 br label %ret ret: ret void } ; GCN-LABEL: {{^}}spill_sgpr_x3: ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 ; VGPR: s_cbranch_scc1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 ; VMEM: buffer_store_dword ; VMEM: s_cbranch_scc1 ; VMEM: buffer_load_dword define amdgpu_kernel void @spill_sgpr_x3(i32 addrspace(1)* %out, i32 %in) #0 { %wide.sgpr = call <3 x i32> asm sideeffect "; def $0", "=s" () #0 %cmp = icmp eq i32 %in, 0 br i1 %cmp, label %bb0, label %ret bb0: call void asm sideeffect "; use $0", "s"(<3 x i32> %wide.sgpr) #0 br label %ret ret: ret void } ; GCN-LABEL: {{^}}spill_sgpr_x4: ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 ; VGPR: s_cbranch_scc1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 ; VMEM: buffer_store_dword ; VMEM: s_cbranch_scc1 ; VMEM: buffer_load_dword define amdgpu_kernel void @spill_sgpr_x4(i32 addrspace(1)* %out, i32 %in) #0 { %wide.sgpr = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 %cmp = icmp eq i32 %in, 0 br i1 %cmp, label %bb0, label %ret bb0: call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) #0 br label %ret ret: ret void } ; GCN-LABEL: {{^}}spill_sgpr_x5: ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 ; VGPR: s_cbranch_scc1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 ; VMEM: buffer_store_dword ; VMEM: s_cbranch_scc1 ; VMEM: buffer_load_dword define amdgpu_kernel void @spill_sgpr_x5(i32 addrspace(1)* %out, i32 %in) #0 { %wide.sgpr = call <5 x i32> asm sideeffect "; def $0", "=s" () #0 %cmp = icmp eq i32 %in, 0 br i1 %cmp, label %bb0, label %ret bb0: call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr) #0 br label %ret ret: ret void } ; GCN-LABEL: {{^}}spill_sgpr_x8: ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7 ; VGPR: s_cbranch_scc1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7 ; VMEM: buffer_store_dword ; VMEM: s_cbranch_scc1 ; VMEM: buffer_load_dword define amdgpu_kernel void @spill_sgpr_x8(i32 addrspace(1)* %out, i32 %in) #0 { %wide.sgpr = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 %cmp = icmp eq i32 %in, 0 br i1 %cmp, label %bb0, label %ret bb0: call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) #0 br label %ret ret: ret void } ; GCN-LABEL: {{^}}spill_sgpr_x16: ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15 ; VGPR: s_cbranch_scc1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15 ; VMEM: buffer_store_dword ; VMEM: s_cbranch_scc1 ; VMEM: buffer_load_dword define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 { %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 %cmp = icmp eq i32 %in, 0 br i1 %cmp, label %bb0, label %ret bb0: call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0 br label %ret ret: ret void } ; GCN-LABEL: {{^}}spill_sgpr_x32: ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 16 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 17 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 18 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 19 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 20 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 21 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 22 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 23 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 24 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 25 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 26 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 27 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 28 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 29 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 30 ; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 31 ; VGPR: s_cbranch_scc1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 16 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 17 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 18 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 19 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 20 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 21 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 22 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 23 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 24 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 25 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 26 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 27 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 28 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 29 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 30 ; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 31 ; VMEM: buffer_store_dword ; VMEM: s_cbranch_scc1 ; VMEM: buffer_load_dword define amdgpu_kernel void @spill_sgpr_x32(i32 addrspace(1)* %out, i32 %in) #0 { %wide.sgpr = call <32 x i32> asm sideeffect "; def $0", "=s" () #0 %cmp = icmp eq i32 %in, 0 br i1 %cmp, label %bb0, label %ret bb0: call void asm sideeffect "; use $0", "s"(<32 x i32> %wide.sgpr) #0 br label %ret ret: ret void } attributes #0 = { nounwind }