• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "PixelProgram.hpp"
16 
17 #include "Constants.hpp"
18 #include "SamplerCore.hpp"
19 #include "Device/Primitive.hpp"
20 #include "Device/Renderer.hpp"
21 #include "Vulkan/VkDevice.hpp"
22 
23 namespace sw {
24 
PixelProgram(const PixelProcessor::State & state,const vk::PipelineLayout * pipelineLayout,const SpirvShader * spirvShader,const vk::DescriptorSet::Bindings & descriptorSets)25 PixelProgram::PixelProgram(
26     const PixelProcessor::State &state,
27     const vk::PipelineLayout *pipelineLayout,
28     const SpirvShader *spirvShader,
29     const vk::DescriptorSet::Bindings &descriptorSets)
30     : PixelRoutine(state, pipelineLayout, spirvShader, descriptorSets)
31 {
32 }
33 
34 // Union all cMask and return it as 4 booleans
maskAny(Int cMask[4],const SampleSet & samples)35 Int4 PixelProgram::maskAny(Int cMask[4], const SampleSet &samples)
36 {
37 	// See if at least 1 sample is used
38 	Int maskUnion = 0;
39 	for(unsigned int q : samples)
40 	{
41 		maskUnion |= cMask[q];
42 	}
43 
44 	// Convert to 4 booleans
45 	Int4 laneBits = Int4(1, 2, 4, 8);
46 	Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
47 	Int4 mask(maskUnion);
48 	mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
49 	return mask;
50 }
51 
52 // Union all cMask/sMask/zMask and return it as 4 booleans
maskAny(Int cMask[4],Int sMask[4],Int zMask[4],const SampleSet & samples)53 Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples)
54 {
55 	// See if at least 1 sample is used
56 	Int maskUnion = 0;
57 	for(unsigned int q : samples)
58 	{
59 		maskUnion |= (cMask[q] & sMask[q] & zMask[q]);
60 	}
61 
62 	// Convert to 4 booleans
63 	Int4 laneBits = Int4(1, 2, 4, 8);
64 	Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
65 	Int4 mask(maskUnion);
66 	mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
67 	return mask;
68 }
69 
setBuiltins(Int & x,Int & y,Float4 (& z)[4],Float4 & w,Int cMask[4],const SampleSet & samples)70 void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], const SampleSet &samples)
71 {
72 	routine.setImmutableInputBuiltins(spirvShader);
73 
74 	// TODO(b/146486064): Consider only assigning these to the SpirvRoutine iff
75 	// they are ever going to be read.
76 	float x0 = 0.5f;
77 	float y0 = 0.5f;
78 	float x1 = 1.5f;
79 	float y1 = 1.5f;
80 
81 	// "When Sample Shading is enabled, the x and y components of FragCoord reflect the
82 	//  location of one of the samples corresponding to the shader invocation. Otherwise,
83 	//  the x and y components of FragCoord reflect the location of the center of the fragment."
84 	if(state.sampleShadingEnabled && state.multiSampleCount > 1)
85 	{
86 		x0 = Constants::VkSampleLocations4[samples[0]][0];
87 		y0 = Constants::VkSampleLocations4[samples[0]][1];
88 		x1 = 1.0f + x0;
89 		y1 = 1.0f + y0;
90 	}
91 
92 	routine.fragCoord[0] = SIMD::Float(Float(x)) + SIMD::Float(x0, x1, x0, x1);
93 	routine.fragCoord[1] = SIMD::Float(Float(y)) + SIMD::Float(y0, y0, y1, y1);
94 	routine.fragCoord[2] = z[0];  // sample 0
95 	routine.fragCoord[3] = w;
96 
97 	routine.invocationsPerSubgroup = SIMD::Width;
98 	routine.helperInvocation = ~maskAny(cMask, samples);
99 	routine.windowSpacePosition[0] = SIMD::Int(x) + SIMD::Int(0, 1, 0, 1);
100 	routine.windowSpacePosition[1] = SIMD::Int(y) + SIMD::Int(0, 0, 1, 1);
101 	routine.layer = *Pointer<Int>(data + OFFSET(DrawData, layer));
102 
103 	// PointCoord formula reference: https://www.khronos.org/registry/vulkan/specs/1.2/html/vkspec.html#primsrast-points-basic
104 	// Note we don't add a 0.5 offset to x and y here (like for fragCoord) because pointCoordX/Y have 0.5 subtracted as part of the viewport transform.
105 	SIMD::Float pointSizeInv = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, pointSizeInv)));
106 	routine.pointCoord[0] = SIMD::Float(0.5f) + pointSizeInv * (((SIMD::Float(Float(x)) + SIMD::Float(0.0f, 1.0f, 0.0f, 1.0f)) - SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX)))));
107 	routine.pointCoord[1] = SIMD::Float(0.5f) + pointSizeInv * (((SIMD::Float(Float(y)) + SIMD::Float(0.0f, 0.0f, 1.0f, 1.0f)) - SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY)))));
108 
109 	routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
110 		assert(builtin.SizeInComponents == 1);
111 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(routine.layer));
112 	});
113 
114 	routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
115 		assert(builtin.SizeInComponents == 4);
116 		value[builtin.FirstComponent + 0] = routine.fragCoord[0];
117 		value[builtin.FirstComponent + 1] = routine.fragCoord[1];
118 		value[builtin.FirstComponent + 2] = routine.fragCoord[2];
119 		value[builtin.FirstComponent + 3] = routine.fragCoord[3];
120 	});
121 
122 	routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
123 		assert(builtin.SizeInComponents == 2);
124 		value[builtin.FirstComponent + 0] = routine.pointCoord[0];
125 		value[builtin.FirstComponent + 1] = routine.pointCoord[1];
126 	});
127 
128 	routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
129 		assert(builtin.SizeInComponents == 1);
130 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
131 	});
132 
133 	routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
134 		assert(builtin.SizeInComponents == 1);
135 		value[builtin.FirstComponent] = As<SIMD::Float>(routine.helperInvocation);
136 	});
137 }
138 
executeShader(Int cMask[4],Int sMask[4],Int zMask[4],const SampleSet & samples)139 void PixelProgram::executeShader(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples)
140 {
141 	routine.device = device;
142 	routine.descriptorSets = data + OFFSET(DrawData, descriptorSets);
143 	routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets);
144 	routine.pushConstants = data + OFFSET(DrawData, pushConstants);
145 	routine.constants = device + OFFSET(vk::Device, constants);
146 
147 	auto it = spirvShader->inputBuiltins.find(spv::BuiltInFrontFacing);
148 	if(it != spirvShader->inputBuiltins.end())
149 	{
150 		ASSERT(it->second.SizeInComponents == 1);
151 		auto frontFacing = Int4(*Pointer<Int>(primitive + OFFSET(Primitive, clockwiseMask)));
152 		routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(frontFacing);
153 	}
154 
155 	it = spirvShader->inputBuiltins.find(spv::BuiltInSampleMask);
156 	if(it != spirvShader->inputBuiltins.end())
157 	{
158 		static_assert(SIMD::Width == 4, "Expects SIMD width to be 4");
159 		Int4 laneBits = Int4(1, 2, 4, 8);
160 
161 		Int4 inputSampleMask = 0;
162 		for(unsigned int q : samples)
163 		{
164 			inputSampleMask |= Int4(1 << q) & CmpNEQ(Int4(cMask[q]) & laneBits, Int4(0));
165 		}
166 
167 		routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(inputSampleMask);
168 		// Sample mask input is an array, as the spec contemplates MSAA levels higher than 32.
169 		// Fill any non-zero indices with 0.
170 		for(auto i = 1u; i < it->second.SizeInComponents; i++)
171 		{
172 			routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = Float4(0);
173 		}
174 	}
175 
176 	it = spirvShader->inputBuiltins.find(spv::BuiltInSampleId);
177 	if(it != spirvShader->inputBuiltins.end())
178 	{
179 		ASSERT(samples.size() == 1);
180 		int sampleId = samples[0];
181 		routine.getVariable(it->second.Id)[it->second.FirstComponent] =
182 		    As<SIMD::Float>(SIMD::Int(sampleId));
183 	}
184 
185 	it = spirvShader->inputBuiltins.find(spv::BuiltInSamplePosition);
186 	if(it != spirvShader->inputBuiltins.end())
187 	{
188 		ASSERT(samples.size() == 1);
189 		int sampleId = samples[0];
190 		routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] =
191 		    SIMD::Float((state.multiSampleCount > 1) ? Constants::VkSampleLocations4[sampleId][0] : 0.5f);
192 		routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] =
193 		    SIMD::Float((state.multiSampleCount > 1) ? Constants::VkSampleLocations4[sampleId][1] : 0.5f);
194 	}
195 
196 	// Note: all lanes initially active to facilitate derivatives etc. Actual coverage is
197 	// handled separately, through the cMask.
198 	auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
199 	auto storesAndAtomicsMask = maskAny(cMask, sMask, zMask, samples);
200 	routine.discardMask = 0;
201 
202 	spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets, state.multiSampleCount);
203 	spirvShader->emitEpilog(&routine);
204 	// At the last invocation of the fragment shader, clear phi data.
205 	// TODO(b/178662288): Automatically clear phis through SpirvRoutine lifetime reduction.
206 	if(samples[0] == static_cast<int>(state.multiSampleCount - 1))
207 	{
208 		spirvShader->clearPhis(&routine);
209 	}
210 
211 	for(int i = 0; i < MAX_COLOR_BUFFERS; i++)
212 	{
213 		c[i].x = routine.outputs[i * 4 + 0];
214 		c[i].y = routine.outputs[i * 4 + 1];
215 		c[i].z = routine.outputs[i * 4 + 2];
216 		c[i].w = routine.outputs[i * 4 + 3];
217 	}
218 
219 	clampColor(c);
220 
221 	if(spirvShader->getAnalysis().ContainsDiscard)
222 	{
223 		for(unsigned int q : samples)
224 		{
225 			cMask[q] &= ~routine.discardMask;
226 		}
227 	}
228 
229 	it = spirvShader->outputBuiltins.find(spv::BuiltInSampleMask);
230 	if(it != spirvShader->outputBuiltins.end())
231 	{
232 		auto outputSampleMask = As<SIMD::Int>(routine.getVariable(it->second.Id)[it->second.FirstComponent]);
233 
234 		for(unsigned int q : samples)
235 		{
236 			cMask[q] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1 << q), SIMD::Int(0)));
237 		}
238 	}
239 
240 	it = spirvShader->outputBuiltins.find(spv::BuiltInFragDepth);
241 	if(it != spirvShader->outputBuiltins.end())
242 	{
243 		for(unsigned int q : samples)
244 		{
245 			z[q] = routine.getVariable(it->second.Id)[it->second.FirstComponent];
246 		}
247 	}
248 }
249 
alphaTest(Int cMask[4],const SampleSet & samples)250 Bool PixelProgram::alphaTest(Int cMask[4], const SampleSet &samples)
251 {
252 	if(!state.alphaToCoverage)
253 	{
254 		return true;
255 	}
256 
257 	alphaToCoverage(cMask, c[0].w, samples);
258 
259 	Int pass = 0;
260 	for(unsigned int q : samples)
261 	{
262 		pass = pass | cMask[q];
263 	}
264 
265 	return pass != 0x0;
266 }
267 
blendColor(Pointer<Byte> cBuffer[4],Int & x,Int sMask[4],Int zMask[4],Int cMask[4],const SampleSet & samples)268 void PixelProgram::blendColor(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], const SampleSet &samples)
269 {
270 	for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
271 	{
272 		if(!state.colorWriteActive(index))
273 		{
274 			continue;
275 		}
276 
277 		auto format = state.colorFormat[index];
278 		switch(format)
279 		{
280 		case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
281 		case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
282 		case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
283 		case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
284 		case VK_FORMAT_B5G6R5_UNORM_PACK16:
285 		case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
286 		case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
287 		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
288 		case VK_FORMAT_R5G6B5_UNORM_PACK16:
289 		case VK_FORMAT_B8G8R8A8_UNORM:
290 		case VK_FORMAT_B8G8R8A8_SRGB:
291 		case VK_FORMAT_R8G8B8A8_UNORM:
292 		case VK_FORMAT_R8G8B8A8_SRGB:
293 		case VK_FORMAT_R8G8_UNORM:
294 		case VK_FORMAT_R8_UNORM:
295 		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
296 		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
297 		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
298 		case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
299 			for(unsigned int q : samples)
300 			{
301 				Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
302 
303 				Vector4f colorf = alphaBlend(index, buffer, c[index], x);
304 
305 				Vector4s color;
306 				color.x = convertFixed16(colorf.x, true);
307 				color.y = convertFixed16(colorf.y, true);
308 				color.z = convertFixed16(colorf.z, true);
309 				color.w = convertFixed16(colorf.w, true);
310 				writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
311 			}
312 			break;
313 		case VK_FORMAT_R16_SFLOAT:
314 		case VK_FORMAT_R16G16_SFLOAT:
315 		case VK_FORMAT_R16G16B16A16_SFLOAT:
316 		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
317 		case VK_FORMAT_R32_SFLOAT:
318 		case VK_FORMAT_R32G32_SFLOAT:
319 		case VK_FORMAT_R32G32B32A32_SFLOAT:
320 		case VK_FORMAT_R32_SINT:
321 		case VK_FORMAT_R32G32_SINT:
322 		case VK_FORMAT_R32G32B32A32_SINT:
323 		case VK_FORMAT_R32_UINT:
324 		case VK_FORMAT_R32G32_UINT:
325 		case VK_FORMAT_R32G32B32A32_UINT:
326 		case VK_FORMAT_R16_UNORM:
327 		case VK_FORMAT_R16G16_UNORM:
328 		case VK_FORMAT_R16G16B16A16_UNORM:
329 		case VK_FORMAT_R16_SINT:
330 		case VK_FORMAT_R16G16_SINT:
331 		case VK_FORMAT_R16G16B16A16_SINT:
332 		case VK_FORMAT_R16_UINT:
333 		case VK_FORMAT_R16G16_UINT:
334 		case VK_FORMAT_R16G16B16A16_UINT:
335 		case VK_FORMAT_R8_SINT:
336 		case VK_FORMAT_R8G8_SINT:
337 		case VK_FORMAT_R8G8B8A8_SINT:
338 		case VK_FORMAT_R8_UINT:
339 		case VK_FORMAT_R8G8_UINT:
340 		case VK_FORMAT_R8G8B8A8_UINT:
341 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
342 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
343 		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
344 		case VK_FORMAT_A2R10G10B10_UINT_PACK32:
345 			for(unsigned int q : samples)
346 			{
347 				Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
348 
349 				Vector4f color = alphaBlend(index, buffer, c[index], x);
350 				writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
351 			}
352 			break;
353 		default:
354 			UNSUPPORTED("VkFormat: %d", int(format));
355 		}
356 	}
357 }
358 
clampColor(Vector4f color[MAX_COLOR_BUFFERS])359 void PixelProgram::clampColor(Vector4f color[MAX_COLOR_BUFFERS])
360 {
361 	// "If the color attachment is fixed-point, the components of the source and destination values and blend factors
362 	//  are each clamped to [0,1] or [-1,1] respectively for an unsigned normalized or signed normalized color attachment
363 	//  prior to evaluating the blend operations. If the color attachment is floating-point, no clamping occurs."
364 
365 	for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
366 	{
367 		if(!state.colorWriteActive(index) && !(index == 0 && state.alphaToCoverage))
368 		{
369 			continue;
370 		}
371 
372 		switch(state.colorFormat[index])
373 		{
374 		case VK_FORMAT_UNDEFINED:
375 			break;
376 		case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
377 		case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
378 		case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
379 		case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
380 		case VK_FORMAT_B5G6R5_UNORM_PACK16:
381 		case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
382 		case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
383 		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
384 		case VK_FORMAT_R5G6B5_UNORM_PACK16:
385 		case VK_FORMAT_B8G8R8A8_UNORM:
386 		case VK_FORMAT_B8G8R8A8_SRGB:
387 		case VK_FORMAT_R8G8B8A8_UNORM:
388 		case VK_FORMAT_R8G8B8A8_SRGB:
389 		case VK_FORMAT_R8G8_UNORM:
390 		case VK_FORMAT_R8_UNORM:
391 		case VK_FORMAT_R16_UNORM:
392 		case VK_FORMAT_R16G16_UNORM:
393 		case VK_FORMAT_R16G16B16A16_UNORM:
394 		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
395 		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
396 		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
397 		case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
398 			color[index].x = Min(Max(color[index].x, Float4(0.0f)), Float4(1.0f));
399 			color[index].y = Min(Max(color[index].y, Float4(0.0f)), Float4(1.0f));
400 			color[index].z = Min(Max(color[index].z, Float4(0.0f)), Float4(1.0f));
401 			color[index].w = Min(Max(color[index].w, Float4(0.0f)), Float4(1.0f));
402 			break;
403 		case VK_FORMAT_R32_SFLOAT:
404 		case VK_FORMAT_R32G32_SFLOAT:
405 		case VK_FORMAT_R32G32B32A32_SFLOAT:
406 		case VK_FORMAT_R32_SINT:
407 		case VK_FORMAT_R32G32_SINT:
408 		case VK_FORMAT_R32G32B32A32_SINT:
409 		case VK_FORMAT_R32_UINT:
410 		case VK_FORMAT_R32G32_UINT:
411 		case VK_FORMAT_R32G32B32A32_UINT:
412 		case VK_FORMAT_R16_SFLOAT:
413 		case VK_FORMAT_R16G16_SFLOAT:
414 		case VK_FORMAT_R16G16B16A16_SFLOAT:
415 		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
416 		case VK_FORMAT_R16_SINT:
417 		case VK_FORMAT_R16G16_SINT:
418 		case VK_FORMAT_R16G16B16A16_SINT:
419 		case VK_FORMAT_R16_UINT:
420 		case VK_FORMAT_R16G16_UINT:
421 		case VK_FORMAT_R16G16B16A16_UINT:
422 		case VK_FORMAT_R8_SINT:
423 		case VK_FORMAT_R8G8_SINT:
424 		case VK_FORMAT_R8G8B8A8_SINT:
425 		case VK_FORMAT_R8_UINT:
426 		case VK_FORMAT_R8G8_UINT:
427 		case VK_FORMAT_R8G8B8A8_UINT:
428 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
429 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
430 		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
431 		case VK_FORMAT_A2R10G10B10_UINT_PACK32:
432 			break;
433 		default:
434 			UNSUPPORTED("VkFormat: %d", int(state.colorFormat[index]));
435 		}
436 	}
437 }
438 
439 }  // namespace sw
440