1; RUN: llc -O0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VGPR %s 2; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VMEM %s 3 4; GCN-LABEL: {{^}}spill_sgpr_x2: 5 6; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 7; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 8; VGPR: s_cbranch_scc1 9 10; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 11; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 12 13 14; VMEM: buffer_store_dword 15; VMEM: s_cbranch_scc1 16 17; VMEM: buffer_load_dword 18define amdgpu_kernel void @spill_sgpr_x2(i32 addrspace(1)* %out, i32 %in) #0 { 19 %wide.sgpr = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 20 %cmp = icmp eq i32 %in, 0 21 br i1 %cmp, label %bb0, label %ret 22 23bb0: 24 call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) #0 25 br label %ret 26 27ret: 28 ret void 29} 30 31; GCN-LABEL: {{^}}spill_sgpr_x3: 32 33; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 34; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 35; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 36; VGPR: s_cbranch_scc1 37 38; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 39; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 40; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 41 42 43; VMEM: buffer_store_dword 44; VMEM: s_cbranch_scc1 45 46; VMEM: buffer_load_dword 47define amdgpu_kernel void @spill_sgpr_x3(i32 addrspace(1)* %out, i32 %in) #0 { 48 %wide.sgpr = call <3 x i32> asm sideeffect "; def $0", "=s" () #0 49 %cmp = icmp eq i32 %in, 0 50 br i1 %cmp, label %bb0, label %ret 51 52bb0: 53 call void asm sideeffect "; use $0", "s"(<3 x i32> %wide.sgpr) #0 54 br label %ret 55 56ret: 57 ret void 58} 59 60; GCN-LABEL: {{^}}spill_sgpr_x4: 61 62; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 63; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 64; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 65; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 66; VGPR: s_cbranch_scc1 67 68; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 69; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 70; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 71; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 72 73 74; VMEM: buffer_store_dword 75; VMEM: s_cbranch_scc1 76 77; VMEM: buffer_load_dword 78define amdgpu_kernel void @spill_sgpr_x4(i32 addrspace(1)* %out, i32 %in) #0 { 79 %wide.sgpr = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 80 %cmp = icmp eq i32 %in, 0 81 br i1 %cmp, label %bb0, label %ret 82 83bb0: 84 call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) #0 85 br label %ret 86 87ret: 88 ret void 89} 90 91; GCN-LABEL: {{^}}spill_sgpr_x5: 92 93; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 94; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 95; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 96; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 97; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 98; VGPR: s_cbranch_scc1 99 100; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 101; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 102; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 103; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 104; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 105 106 107; VMEM: buffer_store_dword 108; VMEM: s_cbranch_scc1 109 110; VMEM: buffer_load_dword 111define amdgpu_kernel void @spill_sgpr_x5(i32 addrspace(1)* %out, i32 %in) #0 { 112 %wide.sgpr = call <5 x i32> asm sideeffect "; def $0", "=s" () #0 113 %cmp = icmp eq i32 %in, 0 114 br i1 %cmp, label %bb0, label %ret 115 116bb0: 117 call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr) #0 118 br label %ret 119 120ret: 121 ret void 122} 123 124; GCN-LABEL: {{^}}spill_sgpr_x8: 125 126; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 127; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 128; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 129; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 130; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 131; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5 132; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6 133; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7 134; VGPR: s_cbranch_scc1 135 136; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 137; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 138; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 139; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 140; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 141; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5 142; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6 143; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7 144 145; VMEM: buffer_store_dword 146; VMEM: s_cbranch_scc1 147 148; VMEM: buffer_load_dword 149define amdgpu_kernel void @spill_sgpr_x8(i32 addrspace(1)* %out, i32 %in) #0 { 150 %wide.sgpr = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 151 %cmp = icmp eq i32 %in, 0 152 br i1 %cmp, label %bb0, label %ret 153 154bb0: 155 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) #0 156 br label %ret 157 158ret: 159 ret void 160} 161 162; GCN-LABEL: {{^}}spill_sgpr_x16: 163 164; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 165; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 166; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 167; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 168; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 169; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5 170; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6 171; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7 172; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8 173; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9 174; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10 175; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11 176; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12 177; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13 178; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14 179; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15 180; VGPR: s_cbranch_scc1 181 182; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 183; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 184; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 185; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 186; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 187; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5 188; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6 189; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7 190; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8 191; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9 192; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10 193; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11 194; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12 195; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13 196; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14 197; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15 198 199; VMEM: buffer_store_dword 200; VMEM: s_cbranch_scc1 201 202; VMEM: buffer_load_dword 203define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 { 204 %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 205 %cmp = icmp eq i32 %in, 0 206 br i1 %cmp, label %bb0, label %ret 207 208bb0: 209 call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0 210 br label %ret 211 212ret: 213 ret void 214} 215 216; GCN-LABEL: {{^}}spill_sgpr_x32: 217 218; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 219; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 220; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 221; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 222; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 223; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5 224; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6 225; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7 226; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8 227; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9 228; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10 229; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11 230; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12 231; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13 232; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14 233; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15 234; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 16 235; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 17 236; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 18 237; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 19 238; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 20 239; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 21 240; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 22 241; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 23 242; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 24 243; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 25 244; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 26 245; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 27 246; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 28 247; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 29 248; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 30 249; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 31 250; VGPR: s_cbranch_scc1 251 252; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 253; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 254; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 255; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 256; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 257; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5 258; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6 259; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7 260; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8 261; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9 262; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10 263; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11 264; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12 265; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13 266; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14 267; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15 268; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 16 269; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 17 270; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 18 271; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 19 272; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 20 273; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 21 274; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 22 275; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 23 276; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 24 277; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 25 278; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 26 279; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 27 280; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 28 281; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 29 282; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 30 283; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 31 284 285; VMEM: buffer_store_dword 286; VMEM: s_cbranch_scc1 287 288; VMEM: buffer_load_dword 289define amdgpu_kernel void @spill_sgpr_x32(i32 addrspace(1)* %out, i32 %in) #0 { 290 %wide.sgpr = call <32 x i32> asm sideeffect "; def $0", "=s" () #0 291 %cmp = icmp eq i32 %in, 0 292 br i1 %cmp, label %bb0, label %ret 293 294bb0: 295 call void asm sideeffect "; use $0", "s"(<32 x i32> %wide.sgpr) #0 296 br label %ret 297 298ret: 299 ret void 300} 301 302attributes #0 = { nounwind } 303