1; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s 2 3; Exactly 1 wave per execution unit. 4; CHECK-LABEL: {{^}}empty_exactly_1: 5; CHECK: SGPRBlocks: 12 6; CHECK: VGPRBlocks: 32 7; CHECK: NumSGPRsForWavesPerEU: 102 8; CHECK: NumVGPRsForWavesPerEU: 129 9define amdgpu_kernel void @empty_exactly_1() #0 { 10entry: 11 ret void 12} 13attributes #0 = {"amdgpu-waves-per-eu"="1,1"} 14 15; Exactly 5 waves per execution unit. 16; CHECK-LABEL: {{^}}empty_exactly_5: 17; CHECK: SGPRBlocks: 12 18; CHECK: VGPRBlocks: 10 19; CHECK: NumSGPRsForWavesPerEU: 102 20; CHECK: NumVGPRsForWavesPerEU: 41 21define amdgpu_kernel void @empty_exactly_5() #1 { 22entry: 23 ret void 24} 25attributes #1 = {"amdgpu-waves-per-eu"="5,5"} 26 27; Exactly 10 waves per execution unit. 28; CHECK-LABEL: {{^}}empty_exactly_10: 29; CHECK: SGPRBlocks: 0 30; CHECK: VGPRBlocks: 0 31; CHECK: NumSGPRsForWavesPerEU: 1 32; CHECK: NumVGPRsForWavesPerEU: 1 33define amdgpu_kernel void @empty_exactly_10() #2 { 34entry: 35 ret void 36} 37attributes #2 = {"amdgpu-waves-per-eu"="10,10"} 38 39; At least 1 wave per execution unit. 40; CHECK-LABEL: {{^}}empty_at_least_1: 41; CHECK: SGPRBlocks: 0 42; CHECK: VGPRBlocks: 0 43; CHECK: NumSGPRsForWavesPerEU: 1 44; CHECK: NumVGPRsForWavesPerEU: 1 45define amdgpu_kernel void @empty_at_least_1() #3 { 46entry: 47 ret void 48} 49attributes #3 = {"amdgpu-waves-per-eu"="1"} 50 51; At least 5 waves per execution unit. 52; CHECK-LABEL: {{^}}empty_at_least_5: 53; CHECK: SGPRBlocks: 0 54; CHECK: VGPRBlocks: 0 55; CHECK: NumSGPRsForWavesPerEU: 1 56; CHECK: NumVGPRsForWavesPerEU: 1 57define amdgpu_kernel void @empty_at_least_5() #4 { 58entry: 59 ret void 60} 61attributes #4 = {"amdgpu-waves-per-eu"="5"} 62 63; At least 10 waves per execution unit. 64; CHECK-LABEL: {{^}}empty_at_least_10: 65; CHECK: SGPRBlocks: 0 66; CHECK: VGPRBlocks: 0 67; CHECK: NumSGPRsForWavesPerEU: 1 68; CHECK: NumVGPRsForWavesPerEU: 1 69define amdgpu_kernel void @empty_at_least_10() #5 { 70entry: 71 ret void 72} 73attributes #5 = {"amdgpu-waves-per-eu"="10"} 74 75; At most 1 wave per execution unit (same as @empty_exactly_1). 76 77; At most 5 waves per execution unit. 78; CHECK-LABEL: {{^}}empty_at_most_5: 79; CHECK: SGPRBlocks: 12 80; CHECK: VGPRBlocks: 10 81; CHECK: NumSGPRsForWavesPerEU: 102 82; CHECK: NumVGPRsForWavesPerEU: 41 83define amdgpu_kernel void @empty_at_most_5() #6 { 84entry: 85 ret void 86} 87attributes #6 = {"amdgpu-waves-per-eu"="1,5"} 88 89; At most 10 waves per execution unit. 90; CHECK-LABEL: {{^}}empty_at_most_10: 91; CHECK: SGPRBlocks: 0 92; CHECK: VGPRBlocks: 0 93; CHECK: NumSGPRsForWavesPerEU: 1 94; CHECK: NumVGPRsForWavesPerEU: 1 95define amdgpu_kernel void @empty_at_most_10() #7 { 96entry: 97 ret void 98} 99attributes #7 = {"amdgpu-waves-per-eu"="1,10"} 100 101; Between 1 and 5 waves per execution unit (same as @empty_at_most_5). 102 103; Between 5 and 10 waves per execution unit. 104; CHECK-LABEL: {{^}}empty_between_5_and_10: 105; CHECK: SGPRBlocks: 0 106; CHECK: VGPRBlocks: 0 107; CHECK: NumSGPRsForWavesPerEU: 1 108; CHECK: NumVGPRsForWavesPerEU: 1 109define amdgpu_kernel void @empty_between_5_and_10() #8 { 110entry: 111 ret void 112} 113attributes #8 = {"amdgpu-waves-per-eu"="5,10"} 114 115@var = addrspace(1) global float 0.0 116 117; Exactly 10 waves per execution unit. 118; CHECK-LABEL: {{^}}exactly_10: 119; CHECK: SGPRBlocks: 1 120; CHECK: VGPRBlocks: 5 121; CHECK: NumSGPRsForWavesPerEU: 12 122; CHECK: NumVGPRsForWavesPerEU: 24 123define amdgpu_kernel void @exactly_10() #9 { 124 %val0 = load volatile float, float addrspace(1)* @var 125 %val1 = load volatile float, float addrspace(1)* @var 126 %val2 = load volatile float, float addrspace(1)* @var 127 %val3 = load volatile float, float addrspace(1)* @var 128 %val4 = load volatile float, float addrspace(1)* @var 129 %val5 = load volatile float, float addrspace(1)* @var 130 %val6 = load volatile float, float addrspace(1)* @var 131 %val7 = load volatile float, float addrspace(1)* @var 132 %val8 = load volatile float, float addrspace(1)* @var 133 %val9 = load volatile float, float addrspace(1)* @var 134 %val10 = load volatile float, float addrspace(1)* @var 135 %val11 = load volatile float, float addrspace(1)* @var 136 %val12 = load volatile float, float addrspace(1)* @var 137 %val13 = load volatile float, float addrspace(1)* @var 138 %val14 = load volatile float, float addrspace(1)* @var 139 %val15 = load volatile float, float addrspace(1)* @var 140 %val16 = load volatile float, float addrspace(1)* @var 141 %val17 = load volatile float, float addrspace(1)* @var 142 %val18 = load volatile float, float addrspace(1)* @var 143 %val19 = load volatile float, float addrspace(1)* @var 144 %val20 = load volatile float, float addrspace(1)* @var 145 %val21 = load volatile float, float addrspace(1)* @var 146 %val22 = load volatile float, float addrspace(1)* @var 147 %val23 = load volatile float, float addrspace(1)* @var 148 %val24 = load volatile float, float addrspace(1)* @var 149 %val25 = load volatile float, float addrspace(1)* @var 150 %val26 = load volatile float, float addrspace(1)* @var 151 %val27 = load volatile float, float addrspace(1)* @var 152 %val28 = load volatile float, float addrspace(1)* @var 153 %val29 = load volatile float, float addrspace(1)* @var 154 %val30 = load volatile float, float addrspace(1)* @var 155 156 store volatile float %val0, float addrspace(1)* @var 157 store volatile float %val1, float addrspace(1)* @var 158 store volatile float %val2, float addrspace(1)* @var 159 store volatile float %val3, float addrspace(1)* @var 160 store volatile float %val4, float addrspace(1)* @var 161 store volatile float %val5, float addrspace(1)* @var 162 store volatile float %val6, float addrspace(1)* @var 163 store volatile float %val7, float addrspace(1)* @var 164 store volatile float %val8, float addrspace(1)* @var 165 store volatile float %val9, float addrspace(1)* @var 166 store volatile float %val10, float addrspace(1)* @var 167 store volatile float %val11, float addrspace(1)* @var 168 store volatile float %val12, float addrspace(1)* @var 169 store volatile float %val13, float addrspace(1)* @var 170 store volatile float %val14, float addrspace(1)* @var 171 store volatile float %val15, float addrspace(1)* @var 172 store volatile float %val16, float addrspace(1)* @var 173 store volatile float %val17, float addrspace(1)* @var 174 store volatile float %val18, float addrspace(1)* @var 175 store volatile float %val19, float addrspace(1)* @var 176 store volatile float %val20, float addrspace(1)* @var 177 store volatile float %val21, float addrspace(1)* @var 178 store volatile float %val22, float addrspace(1)* @var 179 store volatile float %val23, float addrspace(1)* @var 180 store volatile float %val24, float addrspace(1)* @var 181 store volatile float %val25, float addrspace(1)* @var 182 store volatile float %val26, float addrspace(1)* @var 183 store volatile float %val27, float addrspace(1)* @var 184 store volatile float %val28, float addrspace(1)* @var 185 store volatile float %val29, float addrspace(1)* @var 186 store volatile float %val30, float addrspace(1)* @var 187 188 ret void 189} 190attributes #9 = {"amdgpu-waves-per-eu"="10,10"} 191 192; Exactly 256 workitems and exactly 2 waves. 193; CHECK-LABEL: {{^}}empty_workitems_exactly_256_waves_exactly_2: 194; CHECK: SGPRBlocks: 12 195; CHECK: VGPRBlocks: 21 196; CHECK: NumSGPRsForWavesPerEU: 102 197; CHECK: NumVGPRsForWavesPerEU: 85 198define amdgpu_kernel void @empty_workitems_exactly_256_waves_exactly_2() #10 { 199entry: 200 ret void 201} 202attributes #10 = {"amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2,2"} 203