• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Renderer.hpp"
16 
17 #include "Clipper.hpp"
18 #include "Polygon.hpp"
19 #include "Primitive.hpp"
20 #include "Vertex.hpp"
21 #include "Pipeline/Constants.hpp"
22 #include "Pipeline/SpirvShader.hpp"
23 #include "Reactor/Reactor.hpp"
24 #include "System/Debug.hpp"
25 #include "System/Half.hpp"
26 #include "System/Math.hpp"
27 #include "System/Memory.hpp"
28 #include "System/Timer.hpp"
29 #include "Vulkan/VkConfig.h"
30 #include "Vulkan/VkDevice.hpp"
31 #include "Vulkan/VkFence.hpp"
32 #include "Vulkan/VkImageView.hpp"
33 #include "Vulkan/VkQueryPool.hpp"
34 
35 #include "marl/containers.h"
36 #include "marl/defer.h"
37 #include "marl/trace.h"
38 
39 #undef max
40 
41 #ifndef NDEBUG
42 unsigned int minPrimitives = 1;
43 unsigned int maxPrimitives = 1 << 21;
44 #endif
45 
46 namespace sw {
47 
48 template<typename T>
setBatchIndices(unsigned int batch[128][3],VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode,T indices,unsigned int start,unsigned int triangleCount)49 inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount)
50 {
51 	bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
52 
53 	switch(topology)
54 	{
55 		case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
56 		{
57 			auto index = start;
58 			auto pointBatch = &(batch[0][0]);
59 			for(unsigned int i = 0; i < triangleCount; i++)
60 			{
61 				*pointBatch++ = indices[index++];
62 			}
63 
64 			// Repeat the last index to allow for SIMD width overrun.
65 			index--;
66 			for(unsigned int i = 0; i < 3; i++)
67 			{
68 				*pointBatch++ = indices[index];
69 			}
70 			break;
71 		}
72 		case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
73 		{
74 			auto index = 2 * start;
75 			for(unsigned int i = 0; i < triangleCount; i++)
76 			{
77 				batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
78 				batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
79 				batch[i][2] = indices[index + 1];
80 
81 				index += 2;
82 			}
83 			break;
84 		}
85 		case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
86 		{
87 			auto index = start;
88 			for(unsigned int i = 0; i < triangleCount; i++)
89 			{
90 				batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
91 				batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
92 				batch[i][2] = indices[index + 1];
93 
94 				index += 1;
95 			}
96 			break;
97 		}
98 		case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
99 		{
100 			auto index = 3 * start;
101 			for(unsigned int i = 0; i < triangleCount; i++)
102 			{
103 				batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
104 				batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
105 				batch[i][2] = indices[index + (provokeFirst ? 2 : 1)];
106 
107 				index += 3;
108 			}
109 			break;
110 		}
111 		case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
112 		{
113 			auto index = start;
114 			for(unsigned int i = 0; i < triangleCount; i++)
115 			{
116 				batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
117 				batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)];
118 				batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)];
119 
120 				index += 1;
121 			}
122 			break;
123 		}
124 		case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
125 		{
126 			auto index = start + 1;
127 			for(unsigned int i = 0; i < triangleCount; i++)
128 			{
129 				batch[i][provokeFirst ? 0 : 2] = indices[index + 0];
130 				batch[i][provokeFirst ? 1 : 0] = indices[index + 1];
131 				batch[i][provokeFirst ? 2 : 1] = indices[0];
132 
133 				index += 1;
134 			}
135 			break;
136 		}
137 		default:
138 			ASSERT(false);
139 			return false;
140 	}
141 
142 	return true;
143 }
144 
DrawCall()145 DrawCall::DrawCall()
146 {
147 	data = (DrawData *)allocate(sizeof(DrawData));
148 	data->constants = &constants;
149 }
150 
~DrawCall()151 DrawCall::~DrawCall()
152 {
153 	deallocate(data);
154 }
155 
Renderer(vk::Device * device)156 Renderer::Renderer(vk::Device *device)
157     : device(device)
158 {
159 	VertexProcessor::setRoutineCacheSize(1024);
160 	PixelProcessor::setRoutineCacheSize(1024);
161 	SetupProcessor::setRoutineCacheSize(1024);
162 }
163 
~Renderer()164 Renderer::~Renderer()
165 {
166 	drawTickets.take().wait();
167 }
168 
169 // Renderer objects have to be mem aligned to the alignment provided in the class declaration
operator new(size_t size)170 void *Renderer::operator new(size_t size)
171 {
172 	ASSERT(size == sizeof(Renderer));  // This operator can't be called from a derived class
173 	return vk::allocate(sizeof(Renderer), alignof(Renderer), vk::DEVICE_MEMORY, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
174 }
175 
operator delete(void * mem)176 void Renderer::operator delete(void *mem)
177 {
178 	vk::deallocate(mem, vk::DEVICE_MEMORY);
179 }
180 
draw(const sw::Context * context,VkIndexType indexType,unsigned int count,int baseVertex,TaskEvents * events,int instanceID,int viewID,void * indexBuffer,const VkExtent3D & framebufferExtent,PushConstantStorage const & pushConstants,bool update)181 void Renderer::draw(const sw::Context *context, VkIndexType indexType, unsigned int count, int baseVertex,
182                     TaskEvents *events, int instanceID, int viewID, void *indexBuffer, const VkExtent3D &framebufferExtent,
183                     PushConstantStorage const &pushConstants, bool update)
184 {
185 	if(count == 0) { return; }
186 
187 	auto id = nextDrawID++;
188 	MARL_SCOPED_EVENT("draw %d", id);
189 
190 #ifndef NDEBUG
191 	{
192 		unsigned int minPrimitives = 1;
193 		unsigned int maxPrimitives = 1 << 21;
194 		if(count < minPrimitives || count > maxPrimitives)
195 		{
196 			return;
197 		}
198 	}
199 #endif
200 
201 	int ms = context->sampleCount;
202 
203 	if(!context->multiSampleMask)
204 	{
205 		return;
206 	}
207 
208 	marl::Pool<sw::DrawCall>::Loan draw;
209 	{
210 		MARL_SCOPED_EVENT("drawCallPool.borrow()");
211 		draw = drawCallPool.borrow();
212 	}
213 	draw->id = id;
214 
215 	if(update)
216 	{
217 		MARL_SCOPED_EVENT("update");
218 		vertexState = VertexProcessor::update(context);
219 		setupState = SetupProcessor::update(context);
220 		pixelState = PixelProcessor::update(context);
221 
222 		vertexRoutine = VertexProcessor::routine(vertexState, context->pipelineLayout, context->vertexShader, context->descriptorSets);
223 		setupRoutine = SetupProcessor::routine(setupState);
224 		pixelRoutine = PixelProcessor::routine(pixelState, context->pipelineLayout, context->pixelShader, context->descriptorSets);
225 	}
226 
227 	DrawCall::SetupFunction setupPrimitives = nullptr;
228 	unsigned int numPrimitivesPerBatch = MaxBatchSize / ms;
229 
230 	if(context->isDrawTriangle(false))
231 	{
232 		switch(context->polygonMode)
233 		{
234 			case VK_POLYGON_MODE_FILL:
235 				setupPrimitives = &DrawCall::setupSolidTriangles;
236 				break;
237 			case VK_POLYGON_MODE_LINE:
238 				setupPrimitives = &DrawCall::setupWireframeTriangles;
239 				numPrimitivesPerBatch /= 3;
240 				break;
241 			case VK_POLYGON_MODE_POINT:
242 				setupPrimitives = &DrawCall::setupPointTriangles;
243 				numPrimitivesPerBatch /= 3;
244 				break;
245 			default:
246 				UNSUPPORTED("polygon mode: %d", int(context->polygonMode));
247 				return;
248 		}
249 	}
250 	else if(context->isDrawLine(false))
251 	{
252 		setupPrimitives = &DrawCall::setupLines;
253 	}
254 	else  // Point primitive topology
255 	{
256 		setupPrimitives = &DrawCall::setupPoints;
257 	}
258 
259 	DrawData *data = draw->data;
260 	draw->occlusionQuery = occlusionQuery;
261 	draw->batchDataPool = &batchDataPool;
262 	draw->numPrimitives = count;
263 	draw->numPrimitivesPerBatch = numPrimitivesPerBatch;
264 	draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch;
265 	draw->topology = context->topology;
266 	draw->provokingVertexMode = context->provokingVertexMode;
267 	draw->indexType = indexType;
268 	draw->lineRasterizationMode = context->lineRasterizationMode;
269 
270 	draw->vertexRoutine = vertexRoutine;
271 	draw->setupRoutine = setupRoutine;
272 	draw->pixelRoutine = pixelRoutine;
273 	draw->setupPrimitives = setupPrimitives;
274 	draw->setupState = setupState;
275 
276 	data->descriptorSets = context->descriptorSets;
277 	data->descriptorDynamicOffsets = context->descriptorDynamicOffsets;
278 
279 	for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++)
280 	{
281 		data->input[i] = context->input[i].buffer;
282 		data->robustnessSize[i] = context->input[i].robustnessSize;
283 		data->stride[i] = context->input[i].vertexStride;
284 	}
285 
286 	data->indices = indexBuffer;
287 	data->viewID = viewID;
288 	data->instanceID = instanceID;
289 	data->baseVertex = baseVertex;
290 
291 	if(pixelState.stencilActive)
292 	{
293 		data->stencil[0].set(context->frontStencil.reference, context->frontStencil.compareMask, context->frontStencil.writeMask);
294 		data->stencil[1].set(context->backStencil.reference, context->backStencil.compareMask, context->backStencil.writeMask);
295 	}
296 
297 	data->lineWidth = context->lineWidth;
298 
299 	data->factor = factor;
300 
301 	if(pixelState.alphaToCoverage)
302 	{
303 		if(ms == 4)
304 		{
305 			data->a2c0 = float4(0.2f);
306 			data->a2c1 = float4(0.4f);
307 			data->a2c2 = float4(0.6f);
308 			data->a2c3 = float4(0.8f);
309 		}
310 		else if(ms == 2)
311 		{
312 			data->a2c0 = float4(0.25f);
313 			data->a2c1 = float4(0.75f);
314 		}
315 		else
316 			ASSERT(false);
317 	}
318 
319 	if(pixelState.occlusionEnabled)
320 	{
321 		for(int cluster = 0; cluster < MaxClusterCount; cluster++)
322 		{
323 			data->occlusion[cluster] = 0;
324 		}
325 	}
326 
327 	// Viewport
328 	{
329 		float W = 0.5f * viewport.width;
330 		float H = 0.5f * viewport.height;
331 		float X0 = viewport.x + W;
332 		float Y0 = viewport.y + H;
333 		float N = viewport.minDepth;
334 		float F = viewport.maxDepth;
335 		float Z = F - N;
336 		constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
337 
338 		if(context->isDrawTriangle(false))
339 		{
340 			N += context->depthBias;
341 		}
342 
343 		data->WxF = float4(W * subPixF);
344 		data->HxF = float4(H * subPixF);
345 		data->X0xF = float4(X0 * subPixF - subPixF / 2);
346 		data->Y0xF = float4(Y0 * subPixF - subPixF / 2);
347 		data->halfPixelX = float4(0.5f / W);
348 		data->halfPixelY = float4(0.5f / H);
349 		data->viewportHeight = abs(viewport.height);
350 		data->slopeDepthBias = context->slopeDepthBias;
351 		data->depthRange = Z;
352 		data->depthNear = N;
353 	}
354 
355 	// Target
356 	{
357 		for(int index = 0; index < RENDERTARGETS; index++)
358 		{
359 			draw->renderTarget[index] = context->renderTarget[index];
360 
361 			if(draw->renderTarget[index])
362 			{
363 				data->colorBuffer[index] = (unsigned int *)context->renderTarget[index]->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->viewID);
364 				data->colorPitchB[index] = context->renderTarget[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
365 				data->colorSliceB[index] = context->renderTarget[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
366 			}
367 		}
368 
369 		draw->depthBuffer = context->depthBuffer;
370 		draw->stencilBuffer = context->stencilBuffer;
371 
372 		if(draw->depthBuffer)
373 		{
374 			data->depthBuffer = (float *)context->depthBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->viewID);
375 			data->depthPitchB = context->depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
376 			data->depthSliceB = context->depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
377 		}
378 
379 		if(draw->stencilBuffer)
380 		{
381 			data->stencilBuffer = (unsigned char *)context->stencilBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->viewID);
382 			data->stencilPitchB = context->stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
383 			data->stencilSliceB = context->stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
384 		}
385 	}
386 
387 	// Scissor
388 	{
389 		data->scissorX0 = clamp<int>(scissor.offset.x, 0, framebufferExtent.width);
390 		data->scissorX1 = clamp<int>(scissor.offset.x + scissor.extent.width, 0, framebufferExtent.width);
391 		data->scissorY0 = clamp<int>(scissor.offset.y, 0, framebufferExtent.height);
392 		data->scissorY1 = clamp<int>(scissor.offset.y + scissor.extent.height, 0, framebufferExtent.height);
393 	}
394 
395 	// Push constants
396 	{
397 		data->pushConstants = pushConstants;
398 	}
399 
400 	draw->events = events;
401 
402 	DrawCall::run(draw, &drawTickets, clusterQueues);
403 }
404 
setup()405 void DrawCall::setup()
406 {
407 	if(occlusionQuery != nullptr)
408 	{
409 		occlusionQuery->start();
410 	}
411 
412 	if(events)
413 	{
414 		events->start();
415 	}
416 }
417 
teardown()418 void DrawCall::teardown()
419 {
420 	if(events)
421 	{
422 		events->finish();
423 		events = nullptr;
424 	}
425 
426 	if(occlusionQuery != nullptr)
427 	{
428 		for(int cluster = 0; cluster < MaxClusterCount; cluster++)
429 		{
430 			occlusionQuery->add(data->occlusion[cluster]);
431 		}
432 		occlusionQuery->finish();
433 	}
434 
435 	vertexRoutine = {};
436 	setupRoutine = {};
437 	pixelRoutine = {};
438 }
439 
run(const marl::Loan<DrawCall> & draw,marl::Ticket::Queue * tickets,marl::Ticket::Queue clusterQueues[MaxClusterCount])440 void DrawCall::run(const marl::Loan<DrawCall> &draw, marl::Ticket::Queue *tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount])
441 {
442 	draw->setup();
443 
444 	auto const numPrimitives = draw->numPrimitives;
445 	auto const numPrimitivesPerBatch = draw->numPrimitivesPerBatch;
446 	auto const numBatches = draw->numBatches;
447 
448 	auto ticket = tickets->take();
449 	auto finally = marl::make_shared_finally([draw, ticket] {
450 		MARL_SCOPED_EVENT("FINISH draw %d", draw->id);
451 		draw->teardown();
452 		ticket.done();
453 	});
454 
455 	for(unsigned int batchId = 0; batchId < numBatches; batchId++)
456 	{
457 		auto batch = draw->batchDataPool->borrow();
458 		batch->id = batchId;
459 		batch->firstPrimitive = batch->id * numPrimitivesPerBatch;
460 		batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive;
461 
462 		for(int cluster = 0; cluster < MaxClusterCount; cluster++)
463 		{
464 			batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take());
465 		}
466 
467 		marl::schedule([draw, batch, finally] {
468 			processVertices(draw.get(), batch.get());
469 
470 			if(!draw->setupState.rasterizerDiscard)
471 			{
472 				processPrimitives(draw.get(), batch.get());
473 
474 				if(batch->numVisible > 0)
475 				{
476 					processPixels(draw, batch, finally);
477 					return;
478 				}
479 			}
480 
481 			for(int cluster = 0; cluster < MaxClusterCount; cluster++)
482 			{
483 				batch->clusterTickets[cluster].done();
484 			}
485 		});
486 	}
487 }
488 
processVertices(DrawCall * draw,BatchData * batch)489 void DrawCall::processVertices(DrawCall *draw, BatchData *batch)
490 {
491 	MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id);
492 
493 	unsigned int triangleIndices[MaxBatchSize + 1][3];  // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size.
494 	{
495 		MARL_SCOPED_EVENT("processPrimitiveVertices");
496 		processPrimitiveVertices(
497 		    triangleIndices,
498 		    draw->data->indices,
499 		    draw->indexType,
500 		    batch->firstPrimitive,
501 		    batch->numPrimitives,
502 		    draw->topology,
503 		    draw->provokingVertexMode);
504 	}
505 
506 	auto &vertexTask = batch->vertexTask;
507 	vertexTask.primitiveStart = batch->firstPrimitive;
508 	// We're only using batch compaction for points, not lines
509 	vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3);
510 	if(vertexTask.vertexCache.drawCall != draw->id)
511 	{
512 		vertexTask.vertexCache.clear();
513 		vertexTask.vertexCache.drawCall = draw->id;
514 	}
515 
516 	draw->vertexRoutine(&batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data);
517 }
518 
processPrimitives(DrawCall * draw,BatchData * batch)519 void DrawCall::processPrimitives(DrawCall *draw, BatchData *batch)
520 {
521 	MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id);
522 	auto triangles = &batch->triangles[0];
523 	auto primitives = &batch->primitives[0];
524 	batch->numVisible = draw->setupPrimitives(triangles, primitives, draw, batch->numPrimitives);
525 }
526 
processPixels(const marl::Loan<DrawCall> & draw,const marl::Loan<BatchData> & batch,const std::shared_ptr<marl::Finally> & finally)527 void DrawCall::processPixels(const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
528 {
529 	struct Data
530 	{
531 		Data(const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
532 		    : draw(draw)
533 		    , batch(batch)
534 		    , finally(finally)
535 		{}
536 		marl::Loan<DrawCall> draw;
537 		marl::Loan<BatchData> batch;
538 		std::shared_ptr<marl::Finally> finally;
539 	};
540 	auto data = std::make_shared<Data>(draw, batch, finally);
541 	for(int cluster = 0; cluster < MaxClusterCount; cluster++)
542 	{
543 		batch->clusterTickets[cluster].onCall([data, cluster] {
544 			auto &draw = data->draw;
545 			auto &batch = data->batch;
546 			MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster);
547 			draw->pixelRoutine(&batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data);
548 			batch->clusterTickets[cluster].done();
549 		});
550 	}
551 }
552 
synchronize()553 void Renderer::synchronize()
554 {
555 	MARL_SCOPED_EVENT("synchronize");
556 	auto ticket = drawTickets.take();
557 	ticket.wait();
558 	device->updateSamplingRoutineConstCache();
559 	ticket.done();
560 }
561 
processPrimitiveVertices(unsigned int triangleIndicesOut[MaxBatchSize+1][3],const void * primitiveIndices,VkIndexType indexType,unsigned int start,unsigned int triangleCount,VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode)562 void DrawCall::processPrimitiveVertices(
563     unsigned int triangleIndicesOut[MaxBatchSize + 1][3],
564     const void *primitiveIndices,
565     VkIndexType indexType,
566     unsigned int start,
567     unsigned int triangleCount,
568     VkPrimitiveTopology topology,
569     VkProvokingVertexModeEXT provokingVertexMode)
570 {
571 	if(!primitiveIndices)
572 	{
573 		struct LinearIndex
574 		{
575 			unsigned int operator[](unsigned int i) { return i; }
576 		};
577 
578 		if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount))
579 		{
580 			return;
581 		}
582 	}
583 	else
584 	{
585 		switch(indexType)
586 		{
587 			case VK_INDEX_TYPE_UINT16:
588 				if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint16_t *>(primitiveIndices), start, triangleCount))
589 				{
590 					return;
591 				}
592 				break;
593 			case VK_INDEX_TYPE_UINT32:
594 				if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint32_t *>(primitiveIndices), start, triangleCount))
595 				{
596 					return;
597 				}
598 				break;
599 				break;
600 			default:
601 				ASSERT(false);
602 				return;
603 		}
604 	}
605 
606 	// setBatchIndices() takes care of the point case, since it's different due to the compaction
607 	if(topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
608 	{
609 		// Repeat the last index to allow for SIMD width overrun.
610 		triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2];
611 		triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2];
612 		triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2];
613 	}
614 }
615 
setupSolidTriangles(Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)616 int DrawCall::setupSolidTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
617 {
618 	auto &state = drawCall->setupState;
619 
620 	int ms = state.multiSampleCount;
621 	const DrawData *data = drawCall->data;
622 	int visible = 0;
623 
624 	for(int i = 0; i < count; i++, triangles++)
625 	{
626 		Vertex &v0 = triangles->v0;
627 		Vertex &v1 = triangles->v1;
628 		Vertex &v2 = triangles->v2;
629 
630 		Polygon polygon(&v0.position, &v1.position, &v2.position);
631 
632 		if((v0.cullMask | v1.cullMask | v2.cullMask) == 0)
633 		{
634 			continue;
635 		}
636 
637 		if((v0.clipFlags & v1.clipFlags & v2.clipFlags) != Clipper::CLIP_FINITE)
638 		{
639 			continue;
640 		}
641 
642 		int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
643 		if(clipFlagsOr != Clipper::CLIP_FINITE)
644 		{
645 			if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall))
646 			{
647 				continue;
648 			}
649 		}
650 
651 		if(drawCall->setupRoutine(primitives, triangles, &polygon, data))
652 		{
653 			primitives += ms;
654 			visible++;
655 		}
656 	}
657 
658 	return visible;
659 }
660 
setupWireframeTriangles(Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)661 int DrawCall::setupWireframeTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
662 {
663 	auto &state = drawCall->setupState;
664 
665 	int ms = state.multiSampleCount;
666 	int visible = 0;
667 
668 	for(int i = 0; i < count; i++)
669 	{
670 		const Vertex &v0 = triangles[i].v0;
671 		const Vertex &v1 = triangles[i].v1;
672 		const Vertex &v2 = triangles[i].v2;
673 
674 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
675 		          (v0.x * v2.y - v0.y * v2.x) * v1.w +
676 		          (v2.x * v1.y - v1.x * v2.y) * v0.w;
677 
678 		bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
679 		if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
680 		{
681 			if(frontFacing) continue;
682 		}
683 		if(state.cullMode & VK_CULL_MODE_BACK_BIT)
684 		{
685 			if(!frontFacing) continue;
686 		}
687 
688 		Triangle lines[3];
689 		lines[0].v0 = v0;
690 		lines[0].v1 = v1;
691 		lines[1].v0 = v1;
692 		lines[1].v1 = v2;
693 		lines[2].v0 = v2;
694 		lines[2].v1 = v0;
695 
696 		for(int i = 0; i < 3; i++)
697 		{
698 			if(setupLine(*primitives, lines[i], *drawCall))
699 			{
700 				primitives += ms;
701 				visible++;
702 			}
703 		}
704 	}
705 
706 	return visible;
707 }
708 
setupPointTriangles(Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)709 int DrawCall::setupPointTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
710 {
711 	auto &state = drawCall->setupState;
712 
713 	int ms = state.multiSampleCount;
714 	int visible = 0;
715 
716 	for(int i = 0; i < count; i++)
717 	{
718 		const Vertex &v0 = triangles[i].v0;
719 		const Vertex &v1 = triangles[i].v1;
720 		const Vertex &v2 = triangles[i].v2;
721 
722 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
723 		          (v0.x * v2.y - v0.y * v2.x) * v1.w +
724 		          (v2.x * v1.y - v1.x * v2.y) * v0.w;
725 
726 		bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
727 		if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
728 		{
729 			if(frontFacing) continue;
730 		}
731 		if(state.cullMode & VK_CULL_MODE_BACK_BIT)
732 		{
733 			if(!frontFacing) continue;
734 		}
735 
736 		Triangle points[3];
737 		points[0].v0 = v0;
738 		points[1].v0 = v1;
739 		points[2].v0 = v2;
740 
741 		for(int i = 0; i < 3; i++)
742 		{
743 			if(setupPoint(*primitives, points[i], *drawCall))
744 			{
745 				primitives += ms;
746 				visible++;
747 			}
748 		}
749 	}
750 
751 	return visible;
752 }
753 
setupLines(Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)754 int DrawCall::setupLines(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
755 {
756 	auto &state = drawCall->setupState;
757 
758 	int visible = 0;
759 	int ms = state.multiSampleCount;
760 
761 	for(int i = 0; i < count; i++)
762 	{
763 		if(setupLine(*primitives, *triangles, *drawCall))
764 		{
765 			primitives += ms;
766 			visible++;
767 		}
768 
769 		triangles++;
770 	}
771 
772 	return visible;
773 }
774 
setupPoints(Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)775 int DrawCall::setupPoints(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
776 {
777 	auto &state = drawCall->setupState;
778 
779 	int visible = 0;
780 	int ms = state.multiSampleCount;
781 
782 	for(int i = 0; i < count; i++)
783 	{
784 		if(setupPoint(*primitives, *triangles, *drawCall))
785 		{
786 			primitives += ms;
787 			visible++;
788 		}
789 
790 		triangles++;
791 	}
792 
793 	return visible;
794 }
795 
setupLine(Primitive & primitive,Triangle & triangle,const DrawCall & draw)796 bool DrawCall::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
797 {
798 	const DrawData &data = *draw.data;
799 
800 	float lineWidth = data.lineWidth;
801 
802 	Vertex &v0 = triangle.v0;
803 	Vertex &v1 = triangle.v1;
804 
805 	if((v0.cullMask | v1.cullMask) == 0)
806 	{
807 		return false;
808 	}
809 
810 	const float4 &P0 = v0.position;
811 	const float4 &P1 = v1.position;
812 
813 	if(P0.w <= 0 && P1.w <= 0)
814 	{
815 		return false;
816 	}
817 
818 	constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
819 
820 	const float W = data.WxF[0] * (1.0f / subPixF);
821 	const float H = data.HxF[0] * (1.0f / subPixF);
822 
823 	float dx = W * (P1.x / P1.w - P0.x / P0.w);
824 	float dy = H * (P1.y / P1.w - P0.y / P0.w);
825 
826 	if(dx == 0 && dy == 0)
827 	{
828 		return false;
829 	}
830 
831 	if(draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
832 	{
833 		// Rectangle centered on the line segment
834 
835 		float4 P[4];
836 		int C[4];
837 
838 		P[0] = P0;
839 		P[1] = P1;
840 		P[2] = P1;
841 		P[3] = P0;
842 
843 		float scale = lineWidth * 0.5f / sqrt(dx * dx + dy * dy);
844 
845 		dx *= scale;
846 		dy *= scale;
847 
848 		float dx0h = dx * P0.w / H;
849 		float dy0w = dy * P0.w / W;
850 
851 		float dx1h = dx * P1.w / H;
852 		float dy1w = dy * P1.w / W;
853 
854 		P[0].x += -dy0w;
855 		P[0].y += +dx0h;
856 		C[0] = Clipper::ComputeClipFlags(P[0]);
857 
858 		P[1].x += -dy1w;
859 		P[1].y += +dx1h;
860 		C[1] = Clipper::ComputeClipFlags(P[1]);
861 
862 		P[2].x += +dy1w;
863 		P[2].y += -dx1h;
864 		C[2] = Clipper::ComputeClipFlags(P[2]);
865 
866 		P[3].x += +dy0w;
867 		P[3].y += -dx0h;
868 		C[3] = Clipper::ComputeClipFlags(P[3]);
869 
870 		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
871 		{
872 			Polygon polygon(P, 4);
873 
874 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
875 
876 			if(clipFlagsOr != Clipper::CLIP_FINITE)
877 			{
878 				if(!Clipper::Clip(polygon, clipFlagsOr, draw))
879 				{
880 					return false;
881 				}
882 			}
883 
884 			return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
885 		}
886 	}
887 	else if(false)  // TODO(b/80135519): Deprecate
888 	{
889 		// Connecting diamonds polygon
890 		// This shape satisfies the diamond test convention, except for the exit rule part.
891 		// Line segments with overlapping endpoints have duplicate fragments.
892 		// The ideal algorithm requires half-open line rasterization (b/80135519).
893 
894 		float4 P[8];
895 		int C[8];
896 
897 		P[0] = P0;
898 		P[1] = P0;
899 		P[2] = P0;
900 		P[3] = P0;
901 		P[4] = P1;
902 		P[5] = P1;
903 		P[6] = P1;
904 		P[7] = P1;
905 
906 		float dx0 = lineWidth * 0.5f * P0.w / W;
907 		float dy0 = lineWidth * 0.5f * P0.w / H;
908 
909 		float dx1 = lineWidth * 0.5f * P1.w / W;
910 		float dy1 = lineWidth * 0.5f * P1.w / H;
911 
912 		P[0].x += -dx0;
913 		C[0] = Clipper::ComputeClipFlags(P[0]);
914 
915 		P[1].y += +dy0;
916 		C[1] = Clipper::ComputeClipFlags(P[1]);
917 
918 		P[2].x += +dx0;
919 		C[2] = Clipper::ComputeClipFlags(P[2]);
920 
921 		P[3].y += -dy0;
922 		C[3] = Clipper::ComputeClipFlags(P[3]);
923 
924 		P[4].x += -dx1;
925 		C[4] = Clipper::ComputeClipFlags(P[4]);
926 
927 		P[5].y += +dy1;
928 		C[5] = Clipper::ComputeClipFlags(P[5]);
929 
930 		P[6].x += +dx1;
931 		C[6] = Clipper::ComputeClipFlags(P[6]);
932 
933 		P[7].y += -dy1;
934 		C[7] = Clipper::ComputeClipFlags(P[7]);
935 
936 		if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
937 		{
938 			float4 L[6];
939 
940 			if(dx > -dy)
941 			{
942 				if(dx > dy)  // Right
943 				{
944 					L[0] = P[0];
945 					L[1] = P[1];
946 					L[2] = P[5];
947 					L[3] = P[6];
948 					L[4] = P[7];
949 					L[5] = P[3];
950 				}
951 				else  // Down
952 				{
953 					L[0] = P[0];
954 					L[1] = P[4];
955 					L[2] = P[5];
956 					L[3] = P[6];
957 					L[4] = P[2];
958 					L[5] = P[3];
959 				}
960 			}
961 			else
962 			{
963 				if(dx > dy)  // Up
964 				{
965 					L[0] = P[0];
966 					L[1] = P[1];
967 					L[2] = P[2];
968 					L[3] = P[6];
969 					L[4] = P[7];
970 					L[5] = P[4];
971 				}
972 				else  // Left
973 				{
974 					L[0] = P[1];
975 					L[1] = P[2];
976 					L[2] = P[3];
977 					L[3] = P[7];
978 					L[4] = P[4];
979 					L[5] = P[5];
980 				}
981 			}
982 
983 			Polygon polygon(L, 6);
984 
985 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7];
986 
987 			if(clipFlagsOr != Clipper::CLIP_FINITE)
988 			{
989 				if(!Clipper::Clip(polygon, clipFlagsOr, draw))
990 				{
991 					return false;
992 				}
993 			}
994 
995 			return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
996 		}
997 	}
998 	else
999 	{
1000 		// Parallelogram approximating Bresenham line
1001 		// This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the
1002 		// duplicate fragment rasterization problem and satisfies all of Vulkan's minimum
1003 		// requirements for Bresenham line segment rasterization.
1004 
1005 		float4 P[8];
1006 		P[0] = P0;
1007 		P[1] = P0;
1008 		P[2] = P0;
1009 		P[3] = P0;
1010 		P[4] = P1;
1011 		P[5] = P1;
1012 		P[6] = P1;
1013 		P[7] = P1;
1014 
1015 		float dx0 = lineWidth * 0.5f * P0.w / W;
1016 		float dy0 = lineWidth * 0.5f * P0.w / H;
1017 
1018 		float dx1 = lineWidth * 0.5f * P1.w / W;
1019 		float dy1 = lineWidth * 0.5f * P1.w / H;
1020 
1021 		P[0].x += -dx0;
1022 		P[1].y += +dy0;
1023 		P[2].x += +dx0;
1024 		P[3].y += -dy0;
1025 		P[4].x += -dx1;
1026 		P[5].y += +dy1;
1027 		P[6].x += +dx1;
1028 		P[7].y += -dy1;
1029 
1030 		float4 L[4];
1031 
1032 		if(dx > -dy)
1033 		{
1034 			if(dx > dy)  // Right
1035 			{
1036 				L[0] = P[1];
1037 				L[1] = P[5];
1038 				L[2] = P[7];
1039 				L[3] = P[3];
1040 			}
1041 			else  // Down
1042 			{
1043 				L[0] = P[0];
1044 				L[1] = P[4];
1045 				L[2] = P[6];
1046 				L[3] = P[2];
1047 			}
1048 		}
1049 		else
1050 		{
1051 			if(dx > dy)  // Up
1052 			{
1053 				L[0] = P[0];
1054 				L[1] = P[2];
1055 				L[2] = P[6];
1056 				L[3] = P[4];
1057 			}
1058 			else  // Left
1059 			{
1060 				L[0] = P[1];
1061 				L[1] = P[3];
1062 				L[2] = P[7];
1063 				L[3] = P[5];
1064 			}
1065 		}
1066 
1067 		int C0 = Clipper::ComputeClipFlags(L[0]);
1068 		int C1 = Clipper::ComputeClipFlags(L[1]);
1069 		int C2 = Clipper::ComputeClipFlags(L[2]);
1070 		int C3 = Clipper::ComputeClipFlags(L[3]);
1071 
1072 		if((C0 & C1 & C2 & C3) == Clipper::CLIP_FINITE)
1073 		{
1074 			Polygon polygon(L, 4);
1075 
1076 			int clipFlagsOr = C0 | C1 | C2 | C3;
1077 
1078 			if(clipFlagsOr != Clipper::CLIP_FINITE)
1079 			{
1080 				if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1081 				{
1082 					return false;
1083 				}
1084 			}
1085 
1086 			return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
1087 		}
1088 	}
1089 
1090 	return false;
1091 }
1092 
setupPoint(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1093 bool DrawCall::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1094 {
1095 	const DrawData &data = *draw.data;
1096 
1097 	Vertex &v = triangle.v0;
1098 
1099 	if(v.cullMask == 0)
1100 	{
1101 		return false;
1102 	}
1103 
1104 	float pSize = v.pointSize;
1105 
1106 	pSize = clamp(pSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE));
1107 
1108 	float4 P[4];
1109 	int C[4];
1110 
1111 	P[0] = v.position;
1112 	P[1] = v.position;
1113 	P[2] = v.position;
1114 	P[3] = v.position;
1115 
1116 	const float X = pSize * P[0].w * data.halfPixelX[0];
1117 	const float Y = pSize * P[0].w * data.halfPixelY[0];
1118 
1119 	P[0].x -= X;
1120 	P[0].y += Y;
1121 	C[0] = Clipper::ComputeClipFlags(P[0]);
1122 
1123 	P[1].x += X;
1124 	P[1].y += Y;
1125 	C[1] = Clipper::ComputeClipFlags(P[1]);
1126 
1127 	P[2].x += X;
1128 	P[2].y -= Y;
1129 	C[2] = Clipper::ComputeClipFlags(P[2]);
1130 
1131 	P[3].x -= X;
1132 	P[3].y -= Y;
1133 	C[3] = Clipper::ComputeClipFlags(P[3]);
1134 
1135 	Polygon polygon(P, 4);
1136 
1137 	if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1138 	{
1139 		int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
1140 
1141 		if(clipFlagsOr != Clipper::CLIP_FINITE)
1142 		{
1143 			if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1144 			{
1145 				return false;
1146 			}
1147 		}
1148 
1149 		triangle.v1 = triangle.v0;
1150 		triangle.v2 = triangle.v0;
1151 
1152 		constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
1153 
1154 		triangle.v1.projected.x += iround(subPixF * 0.5f * pSize);
1155 		triangle.v2.projected.y -= iround(subPixF * 0.5f * pSize) * (data.HxF[0] > 0.0f ? 1 : -1);  // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1156 		return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
1157 	}
1158 
1159 	return false;
1160 }
1161 
addQuery(vk::Query * query)1162 void Renderer::addQuery(vk::Query *query)
1163 {
1164 	ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1165 	ASSERT(!occlusionQuery);
1166 
1167 	occlusionQuery = query;
1168 }
1169 
removeQuery(vk::Query * query)1170 void Renderer::removeQuery(vk::Query *query)
1171 {
1172 	ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1173 	ASSERT(occlusionQuery == query);
1174 
1175 	occlusionQuery = nullptr;
1176 }
1177 
1178 // TODO(b/137740918): Optimize instancing to use a single draw call.
advanceInstanceAttributes(Stream * inputs)1179 void Renderer::advanceInstanceAttributes(Stream *inputs)
1180 {
1181 	for(uint32_t i = 0; i < vk::MAX_VERTEX_INPUT_BINDINGS; i++)
1182 	{
1183 		auto &attrib = inputs[i];
1184 		if((attrib.format != VK_FORMAT_UNDEFINED) && attrib.instanceStride && (attrib.instanceStride < attrib.robustnessSize))
1185 		{
1186 			// Under the casts: attrib.buffer += attrib.instanceStride
1187 			attrib.buffer = (void const *)((uintptr_t)attrib.buffer + attrib.instanceStride);
1188 			attrib.robustnessSize -= attrib.instanceStride;
1189 		}
1190 	}
1191 }
1192 
setViewport(const VkViewport & viewport)1193 void Renderer::setViewport(const VkViewport &viewport)
1194 {
1195 	this->viewport = viewport;
1196 }
1197 
setScissor(const VkRect2D & scissor)1198 void Renderer::setScissor(const VkRect2D &scissor)
1199 {
1200 	this->scissor = scissor;
1201 }
1202 
1203 }  // namespace sw
1204