• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "PixelProgram.hpp"
16 
17 #include "Constants.hpp"
18 #include "SamplerCore.hpp"
19 #include "Device/Primitive.hpp"
20 #include "Device/Renderer.hpp"
21 #include "Vulkan/VkDevice.hpp"
22 
23 namespace sw {
24 
PixelProgram(const PixelProcessor::State & state,const vk::PipelineLayout * pipelineLayout,const SpirvShader * spirvShader,const vk::DescriptorSet::Bindings & descriptorSets)25 PixelProgram::PixelProgram(
26     const PixelProcessor::State &state,
27     const vk::PipelineLayout *pipelineLayout,
28     const SpirvShader *spirvShader,
29     const vk::DescriptorSet::Bindings &descriptorSets)
30     : PixelRoutine(state, pipelineLayout, spirvShader, descriptorSets)
31 {
32 }
33 
34 // Union all cMask and return it as Booleans
maskAny(Int cMask[4],const SampleSet & samples)35 SIMD::Int PixelProgram::maskAny(Int cMask[4], const SampleSet &samples)
36 {
37 	// See if at least 1 sample is used
38 	Int maskUnion = 0;
39 	for(unsigned int q : samples)
40 	{
41 		maskUnion |= cMask[q];
42 	}
43 
44 	// Convert to Booleans
45 	SIMD::Int laneBits = SIMD::Int([](int i) { return 1 << i; });  // 1, 2, 4, 8, ...
46 	SIMD::Int mask(maskUnion);
47 	mask = CmpNEQ(mask & laneBits, 0);
48 	return mask;
49 }
50 
51 // Union all cMask/sMask/zMask and return it as Booleans
maskAny(Int cMask[4],Int sMask[4],Int zMask[4],const SampleSet & samples)52 SIMD::Int PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples)
53 {
54 	// See if at least 1 sample is used
55 	Int maskUnion = 0;
56 	for(unsigned int q : samples)
57 	{
58 		maskUnion |= (cMask[q] & sMask[q] & zMask[q]);
59 	}
60 
61 	// Convert to Booleans
62 	SIMD::Int laneBits = SIMD::Int([](int i) { return 1 << i; });  // 1, 2, 4, 8, ...
63 	SIMD::Int mask(maskUnion);
64 	mask = CmpNEQ(mask & laneBits, 0);
65 	return mask;
66 }
67 
setBuiltins(Int & x,Int & y,SIMD::Float (& z)[4],SIMD::Float & w,Int cMask[4],const SampleSet & samples)68 void PixelProgram::setBuiltins(Int &x, Int &y, SIMD::Float (&z)[4], SIMD::Float &w, Int cMask[4], const SampleSet &samples)
69 {
70 	routine.setImmutableInputBuiltins(spirvShader);
71 
72 	// TODO(b/146486064): Consider only assigning these to the SpirvRoutine iff
73 	// they are ever going to be read.
74 	float x0 = 0.5f;
75 	float y0 = 0.5f;
76 	float x1 = 1.5f;
77 	float y1 = 1.5f;
78 
79 	// "When Sample Shading is enabled, the x and y components of FragCoord reflect the
80 	//  location of one of the samples corresponding to the shader invocation. Otherwise,
81 	//  the x and y components of FragCoord reflect the location of the center of the fragment."
82 	if(state.sampleShadingEnabled && state.multiSampleCount > 1)
83 	{
84 		x0 = VkSampleLocations4[samples[0]][0];
85 		y0 = VkSampleLocations4[samples[0]][1];
86 		x1 = 1.0f + x0;
87 		y1 = 1.0f + y0;
88 	}
89 
90 	routine.fragCoord[0] = SIMD::Float(Float(x)) + SIMD::Float(x0, x1, x0, x1);
91 	routine.fragCoord[1] = SIMD::Float(Float(y)) + SIMD::Float(y0, y0, y1, y1);
92 	routine.fragCoord[2] = z[0];  // sample 0
93 	routine.fragCoord[3] = w;
94 
95 	routine.invocationsPerSubgroup = SIMD::Width;
96 	routine.helperInvocation = ~maskAny(cMask, samples);
97 	routine.windowSpacePosition[0] = SIMD::Int(x) + SIMD::Int(0, 1, 0, 1);
98 	routine.windowSpacePosition[1] = SIMD::Int(y) + SIMD::Int(0, 0, 1, 1);
99 	routine.layer = *Pointer<Int>(data + OFFSET(DrawData, layer));
100 
101 	// PointCoord formula reference: https://www.khronos.org/registry/vulkan/specs/1.2/html/vkspec.html#primsrast-points-basic
102 	// Note we don't add a 0.5 offset to x and y here (like for fragCoord) because pointCoordX/Y have 0.5 subtracted as part of the viewport transform.
103 	SIMD::Float pointSizeInv = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, pointSizeInv)));
104 	routine.pointCoord[0] = SIMD::Float(0.5f) + pointSizeInv * (((SIMD::Float(Float(x)) + SIMD::Float(0.0f, 1.0f, 0.0f, 1.0f)) - SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, x0)))));
105 	routine.pointCoord[1] = SIMD::Float(0.5f) + pointSizeInv * (((SIMD::Float(Float(y)) + SIMD::Float(0.0f, 0.0f, 1.0f, 1.0f)) - SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, y0)))));
106 
107 	routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const Spirv::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
108 		assert(builtin.SizeInComponents == 1);
109 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(routine.layer));
110 	});
111 
112 	routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const Spirv::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
113 		assert(builtin.SizeInComponents == 4);
114 		value[builtin.FirstComponent + 0] = routine.fragCoord[0];
115 		value[builtin.FirstComponent + 1] = routine.fragCoord[1];
116 		value[builtin.FirstComponent + 2] = routine.fragCoord[2];
117 		value[builtin.FirstComponent + 3] = routine.fragCoord[3];
118 	});
119 
120 	routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const Spirv::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
121 		assert(builtin.SizeInComponents == 2);
122 		value[builtin.FirstComponent + 0] = routine.pointCoord[0];
123 		value[builtin.FirstComponent + 1] = routine.pointCoord[1];
124 	});
125 
126 	routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const Spirv::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
127 		assert(builtin.SizeInComponents == 1);
128 		value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
129 	});
130 
131 	routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const Spirv::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
132 		assert(builtin.SizeInComponents == 1);
133 		value[builtin.FirstComponent] = As<SIMD::Float>(routine.helperInvocation);
134 	});
135 }
136 
executeShader(Int cMask[4],Int sMask[4],Int zMask[4],const SampleSet & samples)137 void PixelProgram::executeShader(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples)
138 {
139 	routine.device = device;
140 	routine.descriptorSets = data + OFFSET(DrawData, descriptorSets);
141 	routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets);
142 	routine.pushConstants = data + OFFSET(DrawData, pushConstants);
143 	routine.constants = device + OFFSET(vk::Device, constants);
144 
145 	auto it = spirvShader->inputBuiltins.find(spv::BuiltInFrontFacing);
146 	if(it != spirvShader->inputBuiltins.end())
147 	{
148 		ASSERT(it->second.SizeInComponents == 1);
149 		auto frontFacing = SIMD::Int(*Pointer<Int>(primitive + OFFSET(Primitive, clockwiseMask)));
150 		routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<SIMD::Float>(frontFacing);
151 	}
152 
153 	it = spirvShader->inputBuiltins.find(spv::BuiltInSampleMask);
154 	if(it != spirvShader->inputBuiltins.end())
155 	{
156 		ASSERT(SIMD::Width == 4);
157 		SIMD::Int laneBits = SIMD::Int(1, 2, 4, 8);
158 
159 		SIMD::Int inputSampleMask = 0;
160 		for(unsigned int q : samples)
161 		{
162 			inputSampleMask |= SIMD::Int(1 << q) & CmpNEQ(SIMD::Int(cMask[q]) & laneBits, 0);
163 		}
164 
165 		routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<SIMD::Float>(inputSampleMask);
166 		// Sample mask input is an array, as the spec contemplates MSAA levels higher than 32.
167 		// Fill any non-zero indices with 0.
168 		for(auto i = 1u; i < it->second.SizeInComponents; i++)
169 		{
170 			routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = 0;
171 		}
172 	}
173 
174 	it = spirvShader->inputBuiltins.find(spv::BuiltInSampleId);
175 	if(it != spirvShader->inputBuiltins.end())
176 	{
177 		ASSERT(samples.size() == 1);
178 		int sampleId = samples[0];
179 		routine.getVariable(it->second.Id)[it->second.FirstComponent] =
180 		    As<SIMD::Float>(SIMD::Int(sampleId));
181 	}
182 
183 	it = spirvShader->inputBuiltins.find(spv::BuiltInSamplePosition);
184 	if(it != spirvShader->inputBuiltins.end())
185 	{
186 		ASSERT(samples.size() == 1);
187 		int sampleId = samples[0];
188 		routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] =
189 		    SIMD::Float((state.multiSampleCount > 1) ? VkSampleLocations4[sampleId][0] : 0.5f);
190 		routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] =
191 		    SIMD::Float((state.multiSampleCount > 1) ? VkSampleLocations4[sampleId][1] : 0.5f);
192 	}
193 
194 	// Note: all lanes initially active to facilitate derivatives etc. Actual coverage is
195 	// handled separately, through the cMask.
196 	SIMD::Int activeLaneMask = 0xFFFFFFFF;
197 	SIMD::Int storesAndAtomicsMask = maskAny(cMask, sMask, zMask, samples);
198 	routine.discardMask = 0;
199 
200 	spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets, state.multiSampleCount);
201 	spirvShader->emitEpilog(&routine);
202 
203 	for(int i = 0; i < MAX_COLOR_BUFFERS; i++)
204 	{
205 		c[i].x = routine.outputs[i * 4 + 0];
206 		c[i].y = routine.outputs[i * 4 + 1];
207 		c[i].z = routine.outputs[i * 4 + 2];
208 		c[i].w = routine.outputs[i * 4 + 3];
209 	}
210 
211 	clampColor(c);
212 
213 	if(spirvShader->getAnalysis().ContainsDiscard)
214 	{
215 		for(unsigned int q : samples)
216 		{
217 			cMask[q] &= ~routine.discardMask;
218 		}
219 	}
220 
221 	it = spirvShader->outputBuiltins.find(spv::BuiltInSampleMask);
222 	if(it != spirvShader->outputBuiltins.end())
223 	{
224 		auto outputSampleMask = As<SIMD::Int>(routine.getVariable(it->second.Id)[it->second.FirstComponent]);
225 
226 		for(unsigned int q : samples)
227 		{
228 			cMask[q] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1 << q), SIMD::Int(0)));
229 		}
230 	}
231 
232 	it = spirvShader->outputBuiltins.find(spv::BuiltInFragDepth);
233 	if(it != spirvShader->outputBuiltins.end())
234 	{
235 		for(unsigned int q : samples)
236 		{
237 			z[q] = routine.getVariable(it->second.Id)[it->second.FirstComponent];
238 		}
239 	}
240 }
241 
alphaTest(Int cMask[4],const SampleSet & samples)242 Bool PixelProgram::alphaTest(Int cMask[4], const SampleSet &samples)
243 {
244 	if(!state.alphaToCoverage)
245 	{
246 		return true;
247 	}
248 
249 	alphaToCoverage(cMask, c[0].w, samples);
250 
251 	Int pass = 0;
252 	for(unsigned int q : samples)
253 	{
254 		pass = pass | cMask[q];
255 	}
256 
257 	return pass != 0x0;
258 }
259 
blendColor(Pointer<Byte> cBuffer[4],Int & x,Int sMask[4],Int zMask[4],Int cMask[4],const SampleSet & samples)260 void PixelProgram::blendColor(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], const SampleSet &samples)
261 {
262 	for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
263 	{
264 		if(!state.colorWriteActive(index))
265 		{
266 			continue;
267 		}
268 
269 		for(unsigned int q : samples)
270 		{
271 			Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
272 
273 			SIMD::Float4 C = alphaBlend(index, buffer, c[index], x);
274 			ASSERT(SIMD::Width == 4);
275 			Vector4f color;
276 			color.x = Extract128(C.x, 0);
277 			color.y = Extract128(C.y, 0);
278 			color.z = Extract128(C.z, 0);
279 			color.w = Extract128(C.w, 0);
280 			writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
281 		}
282 	}
283 }
284 
clampColor(SIMD::Float4 color[MAX_COLOR_BUFFERS])285 void PixelProgram::clampColor(SIMD::Float4 color[MAX_COLOR_BUFFERS])
286 {
287 	// "If the color attachment is fixed-point, the components of the source and destination values and blend factors
288 	//  are each clamped to [0,1] or [-1,1] respectively for an unsigned normalized or signed normalized color attachment
289 	//  prior to evaluating the blend operations. If the color attachment is floating-point, no clamping occurs."
290 
291 	for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
292 	{
293 		if(!state.colorWriteActive(index) && !(index == 0 && state.alphaToCoverage))
294 		{
295 			continue;
296 		}
297 
298 		switch(state.colorFormat[index])
299 		{
300 		case VK_FORMAT_UNDEFINED:
301 			break;
302 		case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
303 		case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
304 		case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
305 		case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
306 		case VK_FORMAT_B5G6R5_UNORM_PACK16:
307 		case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
308 		case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
309 		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
310 		case VK_FORMAT_R5G6B5_UNORM_PACK16:
311 		case VK_FORMAT_B8G8R8A8_UNORM:
312 		case VK_FORMAT_B8G8R8A8_SRGB:
313 		case VK_FORMAT_R8G8B8A8_UNORM:
314 		case VK_FORMAT_R8G8B8A8_SRGB:
315 		case VK_FORMAT_R8G8_UNORM:
316 		case VK_FORMAT_R8_UNORM:
317 		case VK_FORMAT_R16_UNORM:
318 		case VK_FORMAT_R16G16_UNORM:
319 		case VK_FORMAT_R16G16B16A16_UNORM:
320 		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
321 		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
322 		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
323 		case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
324 			color[index].x = Min(Max(color[index].x, 0.0f), 1.0f);
325 			color[index].y = Min(Max(color[index].y, 0.0f), 1.0f);
326 			color[index].z = Min(Max(color[index].z, 0.0f), 1.0f);
327 			color[index].w = Min(Max(color[index].w, 0.0f), 1.0f);
328 			break;
329 		case VK_FORMAT_R32_SFLOAT:
330 		case VK_FORMAT_R32G32_SFLOAT:
331 		case VK_FORMAT_R32G32B32A32_SFLOAT:
332 		case VK_FORMAT_R32_SINT:
333 		case VK_FORMAT_R32G32_SINT:
334 		case VK_FORMAT_R32G32B32A32_SINT:
335 		case VK_FORMAT_R32_UINT:
336 		case VK_FORMAT_R32G32_UINT:
337 		case VK_FORMAT_R32G32B32A32_UINT:
338 		case VK_FORMAT_R16_SFLOAT:
339 		case VK_FORMAT_R16G16_SFLOAT:
340 		case VK_FORMAT_R16G16B16A16_SFLOAT:
341 		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
342 		case VK_FORMAT_R16_SINT:
343 		case VK_FORMAT_R16G16_SINT:
344 		case VK_FORMAT_R16G16B16A16_SINT:
345 		case VK_FORMAT_R16_UINT:
346 		case VK_FORMAT_R16G16_UINT:
347 		case VK_FORMAT_R16G16B16A16_UINT:
348 		case VK_FORMAT_R8_SINT:
349 		case VK_FORMAT_R8G8_SINT:
350 		case VK_FORMAT_R8G8B8A8_SINT:
351 		case VK_FORMAT_R8_UINT:
352 		case VK_FORMAT_R8G8_UINT:
353 		case VK_FORMAT_R8G8B8A8_UINT:
354 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
355 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
356 		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
357 		case VK_FORMAT_A2R10G10B10_UINT_PACK32:
358 			break;
359 		default:
360 			UNSUPPORTED("VkFormat: %d", int(state.colorFormat[index]));
361 		}
362 	}
363 }
364 
365 }  // namespace sw
366