1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "PixelProgram.hpp"
16
17 #include "Constants.hpp"
18 #include "SamplerCore.hpp"
19 #include "Device/Primitive.hpp"
20 #include "Device/Renderer.hpp"
21 #include "Vulkan/VkDevice.hpp"
22
23 namespace sw {
24
PixelProgram(const PixelProcessor::State & state,const vk::PipelineLayout * pipelineLayout,const SpirvShader * spirvShader,const vk::DescriptorSet::Bindings & descriptorSets)25 PixelProgram::PixelProgram(
26 const PixelProcessor::State &state,
27 const vk::PipelineLayout *pipelineLayout,
28 const SpirvShader *spirvShader,
29 const vk::DescriptorSet::Bindings &descriptorSets)
30 : PixelRoutine(state, pipelineLayout, spirvShader, descriptorSets)
31 {
32 }
33
34 // Union all cMask and return it as 4 booleans
maskAny(Int cMask[4],const SampleSet & samples)35 Int4 PixelProgram::maskAny(Int cMask[4], const SampleSet &samples)
36 {
37 // See if at least 1 sample is used
38 Int maskUnion = 0;
39 for(unsigned int q : samples)
40 {
41 maskUnion |= cMask[q];
42 }
43
44 // Convert to 4 booleans
45 Int4 laneBits = Int4(1, 2, 4, 8);
46 Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
47 Int4 mask(maskUnion);
48 mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
49 return mask;
50 }
51
52 // Union all cMask/sMask/zMask and return it as 4 booleans
maskAny(Int cMask[4],Int sMask[4],Int zMask[4],const SampleSet & samples)53 Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples)
54 {
55 // See if at least 1 sample is used
56 Int maskUnion = 0;
57 for(unsigned int q : samples)
58 {
59 maskUnion |= (cMask[q] & sMask[q] & zMask[q]);
60 }
61
62 // Convert to 4 booleans
63 Int4 laneBits = Int4(1, 2, 4, 8);
64 Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
65 Int4 mask(maskUnion);
66 mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
67 return mask;
68 }
69
setBuiltins(Int & x,Int & y,Float4 (& z)[4],Float4 & w,Int cMask[4],const SampleSet & samples)70 void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], const SampleSet &samples)
71 {
72 routine.setImmutableInputBuiltins(spirvShader);
73
74 // TODO(b/146486064): Consider only assigning these to the SpirvRoutine iff
75 // they are ever going to be read.
76 float x0 = 0.5f;
77 float y0 = 0.5f;
78 float x1 = 1.5f;
79 float y1 = 1.5f;
80
81 // "When Sample Shading is enabled, the x and y components of FragCoord reflect the
82 // location of one of the samples corresponding to the shader invocation. Otherwise,
83 // the x and y components of FragCoord reflect the location of the center of the fragment."
84 if(state.sampleShadingEnabled && state.multiSampleCount > 1)
85 {
86 x0 = Constants::VkSampleLocations4[samples[0]][0];
87 y0 = Constants::VkSampleLocations4[samples[0]][1];
88 x1 = 1.0f + x0;
89 y1 = 1.0f + y0;
90 }
91
92 routine.fragCoord[0] = SIMD::Float(Float(x)) + SIMD::Float(x0, x1, x0, x1);
93 routine.fragCoord[1] = SIMD::Float(Float(y)) + SIMD::Float(y0, y0, y1, y1);
94 routine.fragCoord[2] = z[0]; // sample 0
95 routine.fragCoord[3] = w;
96
97 routine.invocationsPerSubgroup = SIMD::Width;
98 routine.helperInvocation = ~maskAny(cMask, samples);
99 routine.windowSpacePosition[0] = SIMD::Int(x) + SIMD::Int(0, 1, 0, 1);
100 routine.windowSpacePosition[1] = SIMD::Int(y) + SIMD::Int(0, 0, 1, 1);
101 routine.layer = *Pointer<Int>(data + OFFSET(DrawData, layer));
102
103 // PointCoord formula reference: https://www.khronos.org/registry/vulkan/specs/1.2/html/vkspec.html#primsrast-points-basic
104 // Note we don't add a 0.5 offset to x and y here (like for fragCoord) because pointCoordX/Y have 0.5 subtracted as part of the viewport transform.
105 SIMD::Float pointSizeInv = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, pointSizeInv)));
106 routine.pointCoord[0] = SIMD::Float(0.5f) + pointSizeInv * (((SIMD::Float(Float(x)) + SIMD::Float(0.0f, 1.0f, 0.0f, 1.0f)) - SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX)))));
107 routine.pointCoord[1] = SIMD::Float(0.5f) + pointSizeInv * (((SIMD::Float(Float(y)) + SIMD::Float(0.0f, 0.0f, 1.0f, 1.0f)) - SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY)))));
108
109 routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
110 assert(builtin.SizeInComponents == 1);
111 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(routine.layer));
112 });
113
114 routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
115 assert(builtin.SizeInComponents == 4);
116 value[builtin.FirstComponent + 0] = routine.fragCoord[0];
117 value[builtin.FirstComponent + 1] = routine.fragCoord[1];
118 value[builtin.FirstComponent + 2] = routine.fragCoord[2];
119 value[builtin.FirstComponent + 3] = routine.fragCoord[3];
120 });
121
122 routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
123 assert(builtin.SizeInComponents == 2);
124 value[builtin.FirstComponent + 0] = routine.pointCoord[0];
125 value[builtin.FirstComponent + 1] = routine.pointCoord[1];
126 });
127
128 routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
129 assert(builtin.SizeInComponents == 1);
130 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
131 });
132
133 routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
134 assert(builtin.SizeInComponents == 1);
135 value[builtin.FirstComponent] = As<SIMD::Float>(routine.helperInvocation);
136 });
137 }
138
executeShader(Int cMask[4],Int sMask[4],Int zMask[4],const SampleSet & samples)139 void PixelProgram::executeShader(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples)
140 {
141 routine.device = device;
142 routine.descriptorSets = data + OFFSET(DrawData, descriptorSets);
143 routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets);
144 routine.pushConstants = data + OFFSET(DrawData, pushConstants);
145 routine.constants = device + OFFSET(vk::Device, constants);
146
147 auto it = spirvShader->inputBuiltins.find(spv::BuiltInFrontFacing);
148 if(it != spirvShader->inputBuiltins.end())
149 {
150 ASSERT(it->second.SizeInComponents == 1);
151 auto frontFacing = Int4(*Pointer<Int>(primitive + OFFSET(Primitive, clockwiseMask)));
152 routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(frontFacing);
153 }
154
155 it = spirvShader->inputBuiltins.find(spv::BuiltInSampleMask);
156 if(it != spirvShader->inputBuiltins.end())
157 {
158 static_assert(SIMD::Width == 4, "Expects SIMD width to be 4");
159 Int4 laneBits = Int4(1, 2, 4, 8);
160
161 Int4 inputSampleMask = 0;
162 for(unsigned int q : samples)
163 {
164 inputSampleMask |= Int4(1 << q) & CmpNEQ(Int4(cMask[q]) & laneBits, Int4(0));
165 }
166
167 routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(inputSampleMask);
168 // Sample mask input is an array, as the spec contemplates MSAA levels higher than 32.
169 // Fill any non-zero indices with 0.
170 for(auto i = 1u; i < it->second.SizeInComponents; i++)
171 {
172 routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = Float4(0);
173 }
174 }
175
176 it = spirvShader->inputBuiltins.find(spv::BuiltInSampleId);
177 if(it != spirvShader->inputBuiltins.end())
178 {
179 ASSERT(samples.size() == 1);
180 int sampleId = samples[0];
181 routine.getVariable(it->second.Id)[it->second.FirstComponent] =
182 As<SIMD::Float>(SIMD::Int(sampleId));
183 }
184
185 it = spirvShader->inputBuiltins.find(spv::BuiltInSamplePosition);
186 if(it != spirvShader->inputBuiltins.end())
187 {
188 ASSERT(samples.size() == 1);
189 int sampleId = samples[0];
190 routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] =
191 SIMD::Float((state.multiSampleCount > 1) ? Constants::VkSampleLocations4[sampleId][0] : 0.5f);
192 routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] =
193 SIMD::Float((state.multiSampleCount > 1) ? Constants::VkSampleLocations4[sampleId][1] : 0.5f);
194 }
195
196 // Note: all lanes initially active to facilitate derivatives etc. Actual coverage is
197 // handled separately, through the cMask.
198 auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
199 auto storesAndAtomicsMask = maskAny(cMask, sMask, zMask, samples);
200 routine.discardMask = 0;
201
202 spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets, state.multiSampleCount);
203 spirvShader->emitEpilog(&routine);
204 // At the last invocation of the fragment shader, clear phi data.
205 // TODO(b/178662288): Automatically clear phis through SpirvRoutine lifetime reduction.
206 if(samples[0] == static_cast<int>(state.multiSampleCount - 1))
207 {
208 spirvShader->clearPhis(&routine);
209 }
210
211 for(int i = 0; i < MAX_COLOR_BUFFERS; i++)
212 {
213 c[i].x = routine.outputs[i * 4 + 0];
214 c[i].y = routine.outputs[i * 4 + 1];
215 c[i].z = routine.outputs[i * 4 + 2];
216 c[i].w = routine.outputs[i * 4 + 3];
217 }
218
219 clampColor(c);
220
221 if(spirvShader->getAnalysis().ContainsDiscard)
222 {
223 for(unsigned int q : samples)
224 {
225 cMask[q] &= ~routine.discardMask;
226 }
227 }
228
229 it = spirvShader->outputBuiltins.find(spv::BuiltInSampleMask);
230 if(it != spirvShader->outputBuiltins.end())
231 {
232 auto outputSampleMask = As<SIMD::Int>(routine.getVariable(it->second.Id)[it->second.FirstComponent]);
233
234 for(unsigned int q : samples)
235 {
236 cMask[q] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1 << q), SIMD::Int(0)));
237 }
238 }
239
240 it = spirvShader->outputBuiltins.find(spv::BuiltInFragDepth);
241 if(it != spirvShader->outputBuiltins.end())
242 {
243 for(unsigned int q : samples)
244 {
245 z[q] = routine.getVariable(it->second.Id)[it->second.FirstComponent];
246 }
247 }
248 }
249
alphaTest(Int cMask[4],const SampleSet & samples)250 Bool PixelProgram::alphaTest(Int cMask[4], const SampleSet &samples)
251 {
252 if(!state.alphaToCoverage)
253 {
254 return true;
255 }
256
257 alphaToCoverage(cMask, c[0].w, samples);
258
259 Int pass = 0;
260 for(unsigned int q : samples)
261 {
262 pass = pass | cMask[q];
263 }
264
265 return pass != 0x0;
266 }
267
blendColor(Pointer<Byte> cBuffer[4],Int & x,Int sMask[4],Int zMask[4],Int cMask[4],const SampleSet & samples)268 void PixelProgram::blendColor(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], const SampleSet &samples)
269 {
270 for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
271 {
272 if(!state.colorWriteActive(index))
273 {
274 continue;
275 }
276
277 auto format = state.colorFormat[index];
278 switch(format)
279 {
280 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
281 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
282 case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
283 case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
284 case VK_FORMAT_B5G6R5_UNORM_PACK16:
285 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
286 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
287 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
288 case VK_FORMAT_R5G6B5_UNORM_PACK16:
289 case VK_FORMAT_B8G8R8A8_UNORM:
290 case VK_FORMAT_B8G8R8A8_SRGB:
291 case VK_FORMAT_R8G8B8A8_UNORM:
292 case VK_FORMAT_R8G8B8A8_SRGB:
293 case VK_FORMAT_R8G8_UNORM:
294 case VK_FORMAT_R8_UNORM:
295 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
296 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
297 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
298 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
299 for(unsigned int q : samples)
300 {
301 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
302
303 Vector4f colorf = alphaBlend(index, buffer, c[index], x);
304
305 Vector4s color;
306 color.x = convertFixed16(colorf.x, true);
307 color.y = convertFixed16(colorf.y, true);
308 color.z = convertFixed16(colorf.z, true);
309 color.w = convertFixed16(colorf.w, true);
310 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
311 }
312 break;
313 case VK_FORMAT_R16_SFLOAT:
314 case VK_FORMAT_R16G16_SFLOAT:
315 case VK_FORMAT_R16G16B16A16_SFLOAT:
316 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
317 case VK_FORMAT_R32_SFLOAT:
318 case VK_FORMAT_R32G32_SFLOAT:
319 case VK_FORMAT_R32G32B32A32_SFLOAT:
320 case VK_FORMAT_R32_SINT:
321 case VK_FORMAT_R32G32_SINT:
322 case VK_FORMAT_R32G32B32A32_SINT:
323 case VK_FORMAT_R32_UINT:
324 case VK_FORMAT_R32G32_UINT:
325 case VK_FORMAT_R32G32B32A32_UINT:
326 case VK_FORMAT_R16_UNORM:
327 case VK_FORMAT_R16G16_UNORM:
328 case VK_FORMAT_R16G16B16A16_UNORM:
329 case VK_FORMAT_R16_SINT:
330 case VK_FORMAT_R16G16_SINT:
331 case VK_FORMAT_R16G16B16A16_SINT:
332 case VK_FORMAT_R16_UINT:
333 case VK_FORMAT_R16G16_UINT:
334 case VK_FORMAT_R16G16B16A16_UINT:
335 case VK_FORMAT_R8_SINT:
336 case VK_FORMAT_R8G8_SINT:
337 case VK_FORMAT_R8G8B8A8_SINT:
338 case VK_FORMAT_R8_UINT:
339 case VK_FORMAT_R8G8_UINT:
340 case VK_FORMAT_R8G8B8A8_UINT:
341 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
342 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
343 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
344 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
345 for(unsigned int q : samples)
346 {
347 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
348
349 Vector4f color = alphaBlend(index, buffer, c[index], x);
350 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
351 }
352 break;
353 default:
354 UNSUPPORTED("VkFormat: %d", int(format));
355 }
356 }
357 }
358
clampColor(Vector4f color[MAX_COLOR_BUFFERS])359 void PixelProgram::clampColor(Vector4f color[MAX_COLOR_BUFFERS])
360 {
361 // "If the color attachment is fixed-point, the components of the source and destination values and blend factors
362 // are each clamped to [0,1] or [-1,1] respectively for an unsigned normalized or signed normalized color attachment
363 // prior to evaluating the blend operations. If the color attachment is floating-point, no clamping occurs."
364
365 for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
366 {
367 if(!state.colorWriteActive(index) && !(index == 0 && state.alphaToCoverage))
368 {
369 continue;
370 }
371
372 switch(state.colorFormat[index])
373 {
374 case VK_FORMAT_UNDEFINED:
375 break;
376 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
377 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
378 case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
379 case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
380 case VK_FORMAT_B5G6R5_UNORM_PACK16:
381 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
382 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
383 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
384 case VK_FORMAT_R5G6B5_UNORM_PACK16:
385 case VK_FORMAT_B8G8R8A8_UNORM:
386 case VK_FORMAT_B8G8R8A8_SRGB:
387 case VK_FORMAT_R8G8B8A8_UNORM:
388 case VK_FORMAT_R8G8B8A8_SRGB:
389 case VK_FORMAT_R8G8_UNORM:
390 case VK_FORMAT_R8_UNORM:
391 case VK_FORMAT_R16_UNORM:
392 case VK_FORMAT_R16G16_UNORM:
393 case VK_FORMAT_R16G16B16A16_UNORM:
394 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
395 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
396 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
397 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
398 color[index].x = Min(Max(color[index].x, Float4(0.0f)), Float4(1.0f));
399 color[index].y = Min(Max(color[index].y, Float4(0.0f)), Float4(1.0f));
400 color[index].z = Min(Max(color[index].z, Float4(0.0f)), Float4(1.0f));
401 color[index].w = Min(Max(color[index].w, Float4(0.0f)), Float4(1.0f));
402 break;
403 case VK_FORMAT_R32_SFLOAT:
404 case VK_FORMAT_R32G32_SFLOAT:
405 case VK_FORMAT_R32G32B32A32_SFLOAT:
406 case VK_FORMAT_R32_SINT:
407 case VK_FORMAT_R32G32_SINT:
408 case VK_FORMAT_R32G32B32A32_SINT:
409 case VK_FORMAT_R32_UINT:
410 case VK_FORMAT_R32G32_UINT:
411 case VK_FORMAT_R32G32B32A32_UINT:
412 case VK_FORMAT_R16_SFLOAT:
413 case VK_FORMAT_R16G16_SFLOAT:
414 case VK_FORMAT_R16G16B16A16_SFLOAT:
415 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
416 case VK_FORMAT_R16_SINT:
417 case VK_FORMAT_R16G16_SINT:
418 case VK_FORMAT_R16G16B16A16_SINT:
419 case VK_FORMAT_R16_UINT:
420 case VK_FORMAT_R16G16_UINT:
421 case VK_FORMAT_R16G16B16A16_UINT:
422 case VK_FORMAT_R8_SINT:
423 case VK_FORMAT_R8G8_SINT:
424 case VK_FORMAT_R8G8B8A8_SINT:
425 case VK_FORMAT_R8_UINT:
426 case VK_FORMAT_R8G8_UINT:
427 case VK_FORMAT_R8G8B8A8_UINT:
428 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
429 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
430 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
431 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
432 break;
433 default:
434 UNSUPPORTED("VkFormat: %d", int(state.colorFormat[index]));
435 }
436 }
437 }
438
439 } // namespace sw
440