• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Renderer.hpp"
16 
17 #include "Clipper.hpp"
18 #include "Polygon.hpp"
19 #include "Primitive.hpp"
20 #include "Vertex.hpp"
21 #include "Pipeline/Constants.hpp"
22 #include "Pipeline/SpirvShader.hpp"
23 #include "Reactor/Reactor.hpp"
24 #include "System/Debug.hpp"
25 #include "System/Half.hpp"
26 #include "System/Math.hpp"
27 #include "System/Memory.hpp"
28 #include "System/Timer.hpp"
29 #include "Vulkan/VkConfig.hpp"
30 #include "Vulkan/VkDescriptorSet.hpp"
31 #include "Vulkan/VkDevice.hpp"
32 #include "Vulkan/VkFence.hpp"
33 #include "Vulkan/VkImageView.hpp"
34 #include "Vulkan/VkPipelineLayout.hpp"
35 #include "Vulkan/VkQueryPool.hpp"
36 
37 #include "marl/containers.h"
38 #include "marl/defer.h"
39 #include "marl/trace.h"
40 
41 #undef max
42 
43 #ifndef NDEBUG
44 unsigned int minPrimitives = 1;
45 unsigned int maxPrimitives = 1 << 21;
46 #endif
47 
48 namespace sw {
49 
50 template<typename T>
setBatchIndices(unsigned int batch[128][3],VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode,T indices,unsigned int start,unsigned int triangleCount)51 inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount)
52 {
53 	bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
54 
55 	switch(topology)
56 	{
57 	case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
58 		{
59 			auto index = start;
60 			auto pointBatch = &(batch[0][0]);
61 			for(unsigned int i = 0; i < triangleCount; i++)
62 			{
63 				*pointBatch++ = indices[index++];
64 			}
65 
66 			// Repeat the last index to allow for SIMD width overrun.
67 			index--;
68 			for(unsigned int i = 0; i < 3; i++)
69 			{
70 				*pointBatch++ = indices[index];
71 			}
72 		}
73 		break;
74 	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
75 		{
76 			auto index = 2 * start;
77 			for(unsigned int i = 0; i < triangleCount; i++)
78 			{
79 				batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
80 				batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
81 				batch[i][2] = indices[index + 1];
82 
83 				index += 2;
84 			}
85 		}
86 		break;
87 	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
88 		{
89 			auto index = start;
90 			for(unsigned int i = 0; i < triangleCount; i++)
91 			{
92 				batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
93 				batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
94 				batch[i][2] = indices[index + 1];
95 
96 				index += 1;
97 			}
98 		}
99 		break;
100 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
101 		{
102 			auto index = 3 * start;
103 			for(unsigned int i = 0; i < triangleCount; i++)
104 			{
105 				batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
106 				batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
107 				batch[i][2] = indices[index + (provokeFirst ? 2 : 1)];
108 
109 				index += 3;
110 			}
111 		}
112 		break;
113 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
114 		{
115 			auto index = start;
116 			for(unsigned int i = 0; i < triangleCount; i++)
117 			{
118 				batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
119 				batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)];
120 				batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)];
121 
122 				index += 1;
123 			}
124 		}
125 		break;
126 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
127 		{
128 			auto index = start + 1;
129 			for(unsigned int i = 0; i < triangleCount; i++)
130 			{
131 				batch[i][provokeFirst ? 0 : 2] = indices[index + 0];
132 				batch[i][provokeFirst ? 1 : 0] = indices[index + 1];
133 				batch[i][provokeFirst ? 2 : 1] = indices[0];
134 
135 				index += 1;
136 			}
137 		}
138 		break;
139 	default:
140 		ASSERT(false);
141 		return false;
142 	}
143 
144 	return true;
145 }
146 
DrawCall()147 DrawCall::DrawCall()
148 {
149 	// TODO(b/140991626): Use allocateUninitialized() instead of allocateZeroOrPoison() to improve startup peformance.
150 	data = (DrawData *)sw::allocateZeroOrPoison(sizeof(DrawData));
151 }
152 
~DrawCall()153 DrawCall::~DrawCall()
154 {
155 	sw::freeMemory(data);
156 }
157 
Renderer(vk::Device * device)158 Renderer::Renderer(vk::Device *device)
159     : device(device)
160 {
161 	vertexProcessor.setRoutineCacheSize(1024);
162 	pixelProcessor.setRoutineCacheSize(1024);
163 	setupProcessor.setRoutineCacheSize(1024);
164 }
165 
~Renderer()166 Renderer::~Renderer()
167 {
168 	drawTickets.take().wait();
169 }
170 
171 // Renderer objects have to be mem aligned to the alignment provided in the class declaration
operator new(size_t size)172 void *Renderer::operator new(size_t size)
173 {
174 	ASSERT(size == sizeof(Renderer));  // This operator can't be called from a derived class
175 	return vk::allocateHostMemory(sizeof(Renderer), alignof(Renderer), vk::NULL_ALLOCATION_CALLBACKS, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
176 }
177 
operator delete(void * mem)178 void Renderer::operator delete(void *mem)
179 {
180 	vk::freeHostMemory(mem, vk::NULL_ALLOCATION_CALLBACKS);
181 }
182 
draw(const vk::GraphicsPipeline * pipeline,const vk::DynamicState & dynamicState,unsigned int count,int baseVertex,CountedEvent * events,int instanceID,int layer,void * indexBuffer,const VkRect2D & renderArea,vk::Pipeline::PushConstantStorage const & pushConstants,bool update)183 void Renderer::draw(const vk::GraphicsPipeline *pipeline, const vk::DynamicState &dynamicState, unsigned int count, int baseVertex,
184                     CountedEvent *events, int instanceID, int layer, void *indexBuffer, const VkRect2D &renderArea,
185                     vk::Pipeline::PushConstantStorage const &pushConstants, bool update)
186 {
187 	if(count == 0) { return; }
188 
189 	auto id = nextDrawID++;
190 	MARL_SCOPED_EVENT("draw %d", id);
191 
192 	marl::Pool<sw::DrawCall>::Loan draw;
193 	{
194 		MARL_SCOPED_EVENT("drawCallPool.borrow()");
195 		draw = drawCallPool.borrow();
196 	}
197 	draw->id = id;
198 
199 	const vk::GraphicsState &pipelineState = pipeline->getState(dynamicState);
200 	pixelProcessor.setBlendConstant(pipelineState.getBlendConstants());
201 
202 	const vk::Inputs &inputs = pipeline->getInputs();
203 
204 	if(update)
205 	{
206 		MARL_SCOPED_EVENT("update");
207 
208 		const sw::SpirvShader *fragmentShader = pipeline->getShader(VK_SHADER_STAGE_FRAGMENT_BIT).get();
209 		const sw::SpirvShader *vertexShader = pipeline->getShader(VK_SHADER_STAGE_VERTEX_BIT).get();
210 
211 		const vk::Attachments attachments = pipeline->getAttachments();
212 
213 		vertexState = vertexProcessor.update(pipelineState, vertexShader, inputs);
214 		setupState = setupProcessor.update(pipelineState, fragmentShader, vertexShader, attachments);
215 		pixelState = pixelProcessor.update(pipelineState, fragmentShader, vertexShader, attachments, hasOcclusionQuery());
216 
217 		vertexRoutine = vertexProcessor.routine(vertexState, pipelineState.getPipelineLayout(), vertexShader, inputs.getDescriptorSets());
218 		setupRoutine = setupProcessor.routine(setupState);
219 		pixelRoutine = pixelProcessor.routine(pixelState, pipelineState.getPipelineLayout(), fragmentShader, inputs.getDescriptorSets());
220 	}
221 
222 	draw->containsImageWrite = pipeline->containsImageWrite();
223 
224 	DrawCall::SetupFunction setupPrimitives = nullptr;
225 	int ms = pipelineState.getSampleCount();
226 	unsigned int numPrimitivesPerBatch = MaxBatchSize / ms;
227 
228 	if(pipelineState.isDrawTriangle(false))
229 	{
230 		switch(pipelineState.getPolygonMode())
231 		{
232 		case VK_POLYGON_MODE_FILL:
233 			setupPrimitives = &DrawCall::setupSolidTriangles;
234 			break;
235 		case VK_POLYGON_MODE_LINE:
236 			setupPrimitives = &DrawCall::setupWireframeTriangles;
237 			numPrimitivesPerBatch /= 3;
238 			break;
239 		case VK_POLYGON_MODE_POINT:
240 			setupPrimitives = &DrawCall::setupPointTriangles;
241 			numPrimitivesPerBatch /= 3;
242 			break;
243 		default:
244 			UNSUPPORTED("polygon mode: %d", int(pipelineState.getPolygonMode()));
245 			return;
246 		}
247 	}
248 	else if(pipelineState.isDrawLine(false))
249 	{
250 		setupPrimitives = &DrawCall::setupLines;
251 	}
252 	else  // Point primitive topology
253 	{
254 		setupPrimitives = &DrawCall::setupPoints;
255 	}
256 
257 	DrawData *data = draw->data;
258 	draw->occlusionQuery = occlusionQuery;
259 	draw->batchDataPool = &batchDataPool;
260 	draw->numPrimitives = count;
261 	draw->numPrimitivesPerBatch = numPrimitivesPerBatch;
262 	draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch;
263 	draw->topology = pipelineState.getTopology();
264 	draw->provokingVertexMode = pipelineState.getProvokingVertexMode();
265 	draw->indexType = pipeline->getIndexBuffer().getIndexType();
266 	draw->lineRasterizationMode = pipelineState.getLineRasterizationMode();
267 	draw->descriptorSetObjects = inputs.getDescriptorSetObjects();
268 	draw->pipelineLayout = pipelineState.getPipelineLayout();
269 	draw->depthClipEnable = pipelineState.getDepthClipEnable();
270 
271 	draw->vertexRoutine = vertexRoutine;
272 	draw->setupRoutine = setupRoutine;
273 	draw->pixelRoutine = pixelRoutine;
274 	draw->setupPrimitives = setupPrimitives;
275 	draw->setupState = setupState;
276 
277 	data->descriptorSets = inputs.getDescriptorSets();
278 	data->descriptorDynamicOffsets = inputs.getDescriptorDynamicOffsets();
279 
280 	for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++)
281 	{
282 		const sw::Stream &stream = inputs.getStream(i);
283 		data->input[i] = stream.buffer;
284 		data->robustnessSize[i] = stream.robustnessSize;
285 		data->stride[i] = inputs.getVertexStride(i, pipelineState.hasDynamicVertexStride());
286 	}
287 
288 	data->indices = indexBuffer;
289 	data->layer = layer;
290 	data->instanceID = instanceID;
291 	data->baseVertex = baseVertex;
292 
293 	if(pixelState.stencilActive)
294 	{
295 		data->stencil[0].set(pipelineState.getFrontStencil().reference, pipelineState.getFrontStencil().compareMask, pipelineState.getFrontStencil().writeMask);
296 		data->stencil[1].set(pipelineState.getBackStencil().reference, pipelineState.getBackStencil().compareMask, pipelineState.getBackStencil().writeMask);
297 	}
298 
299 	data->lineWidth = pipelineState.getLineWidth();
300 
301 	data->factor = pixelProcessor.factor;
302 
303 	if(pixelState.alphaToCoverage)
304 	{
305 		if(ms == 4)
306 		{
307 			data->a2c0 = float4(0.2f);
308 			data->a2c1 = float4(0.4f);
309 			data->a2c2 = float4(0.6f);
310 			data->a2c3 = float4(0.8f);
311 		}
312 		else if(ms == 2)
313 		{
314 			data->a2c0 = float4(0.25f);
315 			data->a2c1 = float4(0.75f);
316 		}
317 		else if(ms == 1)
318 		{
319 			data->a2c0 = float4(0.5f);
320 		}
321 		else
322 			ASSERT(false);
323 	}
324 
325 	if(pixelState.occlusionEnabled)
326 	{
327 		for(int cluster = 0; cluster < MaxClusterCount; cluster++)
328 		{
329 			data->occlusion[cluster] = 0;
330 		}
331 	}
332 
333 	// Viewport
334 	{
335 		const VkViewport &viewport = pipelineState.getViewport();
336 
337 		float W = 0.5f * viewport.width;
338 		float H = 0.5f * viewport.height;
339 		float X0 = viewport.x + W;
340 		float Y0 = viewport.y + H;
341 		float N = viewport.minDepth;
342 		float F = viewport.maxDepth;
343 		float Z = F - N;
344 		constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
345 
346 		data->WxF = float4(W * subPixF);
347 		data->HxF = float4(H * subPixF);
348 		data->X0xF = float4(X0 * subPixF - subPixF / 2);
349 		data->Y0xF = float4(Y0 * subPixF - subPixF / 2);
350 		data->halfPixelX = float4(0.5f / W);
351 		data->halfPixelY = float4(0.5f / H);
352 		data->viewportHeight = abs(viewport.height);
353 		data->depthRange = Z;
354 		data->depthNear = N;
355 		data->constantDepthBias = pipelineState.getConstantDepthBias();
356 		data->slopeDepthBias = pipelineState.getSlopeDepthBias();
357 		data->depthBiasClamp = pipelineState.getDepthBiasClamp();
358 		data->depthClipEnable = pipelineState.getDepthClipEnable();
359 
360 		const vk::Attachments attachments = pipeline->getAttachments();
361 		if(attachments.depthBuffer)
362 		{
363 			switch(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT))
364 			{
365 			case VK_FORMAT_D16_UNORM:
366 				data->minimumResolvableDepthDifference = 1.0f / 0xFFFF;
367 				break;
368 			case VK_FORMAT_D32_SFLOAT:
369 				// The minimum resolvable depth difference is determined per-polygon for floating-point depth
370 				// buffers. DrawData::minimumResolvableDepthDifference is unused.
371 				break;
372 			default:
373 				UNSUPPORTED("Depth format: %d", int(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT)));
374 			}
375 		}
376 	}
377 
378 	// Target
379 	{
380 		const vk::Attachments attachments = pipeline->getAttachments();
381 
382 		for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
383 		{
384 			draw->colorBuffer[index] = attachments.colorBuffer[index];
385 
386 			if(draw->colorBuffer[index])
387 			{
388 				data->colorBuffer[index] = (unsigned int *)attachments.colorBuffer[index]->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->layer);
389 				data->colorPitchB[index] = attachments.colorBuffer[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
390 				data->colorSliceB[index] = attachments.colorBuffer[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
391 			}
392 		}
393 
394 		draw->depthBuffer = attachments.depthBuffer;
395 		draw->stencilBuffer = attachments.stencilBuffer;
396 
397 		if(draw->depthBuffer)
398 		{
399 			data->depthBuffer = (float *)attachments.depthBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->layer);
400 			data->depthPitchB = attachments.depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
401 			data->depthSliceB = attachments.depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
402 		}
403 
404 		if(draw->stencilBuffer)
405 		{
406 			data->stencilBuffer = (unsigned char *)attachments.stencilBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->layer);
407 			data->stencilPitchB = attachments.stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
408 			data->stencilSliceB = attachments.stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
409 		}
410 	}
411 
412 	// Scissor
413 	{
414 		const VkRect2D &scissor = pipelineState.getScissor();
415 
416 		int x0 = renderArea.offset.x;
417 		int y0 = renderArea.offset.y;
418 		int x1 = x0 + renderArea.extent.width;
419 		int y1 = y0 + renderArea.extent.height;
420 		data->scissorX0 = clamp<int>(scissor.offset.x, x0, x1);
421 		data->scissorX1 = clamp<int>(scissor.offset.x + scissor.extent.width, x0, x1);
422 		data->scissorY0 = clamp<int>(scissor.offset.y, y0, y1);
423 		data->scissorY1 = clamp<int>(scissor.offset.y + scissor.extent.height, y0, y1);
424 	}
425 
426 	// Push constants
427 	{
428 		data->pushConstants = pushConstants;
429 	}
430 
431 	draw->events = events;
432 
433 	vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->pipelineLayout, device);
434 
435 	DrawCall::run(device, draw, &drawTickets, clusterQueues);
436 }
437 
setup()438 void DrawCall::setup()
439 {
440 	if(occlusionQuery != nullptr)
441 	{
442 		occlusionQuery->start();
443 	}
444 
445 	if(events)
446 	{
447 		events->add();
448 	}
449 }
450 
teardown(vk::Device * device)451 void DrawCall::teardown(vk::Device *device)
452 {
453 	if(events)
454 	{
455 		events->done();
456 		events = nullptr;
457 	}
458 
459 	if(occlusionQuery != nullptr)
460 	{
461 		for(int cluster = 0; cluster < MaxClusterCount; cluster++)
462 		{
463 			occlusionQuery->add(data->occlusion[cluster]);
464 		}
465 		occlusionQuery->finish();
466 	}
467 
468 	vertexRoutine = {};
469 	setupRoutine = {};
470 	pixelRoutine = {};
471 
472 	for(auto *target : colorBuffer)
473 	{
474 		if(target)
475 		{
476 			target->contentsChanged(vk::Image::DIRECT_MEMORY_ACCESS);
477 		}
478 	}
479 
480 	if(containsImageWrite)
481 	{
482 		vk::DescriptorSet::ContentsChanged(descriptorSetObjects, pipelineLayout, device);
483 	}
484 }
485 
run(vk::Device * device,const marl::Loan<DrawCall> & draw,marl::Ticket::Queue * tickets,marl::Ticket::Queue clusterQueues[MaxClusterCount])486 void DrawCall::run(vk::Device *device, const marl::Loan<DrawCall> &draw, marl::Ticket::Queue *tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount])
487 {
488 	draw->setup();
489 
490 	auto const numPrimitives = draw->numPrimitives;
491 	auto const numPrimitivesPerBatch = draw->numPrimitivesPerBatch;
492 	auto const numBatches = draw->numBatches;
493 
494 	auto ticket = tickets->take();
495 	auto finally = marl::make_shared_finally([device, draw, ticket] {
496 		MARL_SCOPED_EVENT("FINISH draw %d", draw->id);
497 		draw->teardown(device);
498 		ticket.done();
499 	});
500 
501 	for(unsigned int batchId = 0; batchId < numBatches; batchId++)
502 	{
503 		auto batch = draw->batchDataPool->borrow();
504 		batch->id = batchId;
505 		batch->firstPrimitive = batch->id * numPrimitivesPerBatch;
506 		batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive;
507 
508 		for(int cluster = 0; cluster < MaxClusterCount; cluster++)
509 		{
510 			batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take());
511 		}
512 
513 		marl::schedule([device, draw, batch, finally] {
514 			processVertices(device, draw.get(), batch.get());
515 
516 			if(!draw->setupState.rasterizerDiscard)
517 			{
518 				processPrimitives(device, draw.get(), batch.get());
519 
520 				if(batch->numVisible > 0)
521 				{
522 					processPixels(device, draw, batch, finally);
523 					return;
524 				}
525 			}
526 
527 			for(int cluster = 0; cluster < MaxClusterCount; cluster++)
528 			{
529 				batch->clusterTickets[cluster].done();
530 			}
531 		});
532 	}
533 }
534 
processVertices(vk::Device * device,DrawCall * draw,BatchData * batch)535 void DrawCall::processVertices(vk::Device *device, DrawCall *draw, BatchData *batch)
536 {
537 	MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id);
538 
539 	unsigned int triangleIndices[MaxBatchSize + 1][3];  // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size.
540 	{
541 		MARL_SCOPED_EVENT("processPrimitiveVertices");
542 		processPrimitiveVertices(
543 		    triangleIndices,
544 		    draw->data->indices,
545 		    draw->indexType,
546 		    batch->firstPrimitive,
547 		    batch->numPrimitives,
548 		    draw->topology,
549 		    draw->provokingVertexMode);
550 	}
551 
552 	auto &vertexTask = batch->vertexTask;
553 	vertexTask.primitiveStart = batch->firstPrimitive;
554 	// We're only using batch compaction for points, not lines
555 	vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3);
556 	if(vertexTask.vertexCache.drawCall != draw->id)
557 	{
558 		vertexTask.vertexCache.clear();
559 		vertexTask.vertexCache.drawCall = draw->id;
560 	}
561 
562 	draw->vertexRoutine(device, &batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data);
563 }
564 
processPrimitives(vk::Device * device,DrawCall * draw,BatchData * batch)565 void DrawCall::processPrimitives(vk::Device *device, DrawCall *draw, BatchData *batch)
566 {
567 	MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id);
568 	auto triangles = &batch->triangles[0];
569 	auto primitives = &batch->primitives[0];
570 	batch->numVisible = draw->setupPrimitives(device, triangles, primitives, draw, batch->numPrimitives);
571 }
572 
processPixels(vk::Device * device,const marl::Loan<DrawCall> & draw,const marl::Loan<BatchData> & batch,const std::shared_ptr<marl::Finally> & finally)573 void DrawCall::processPixels(vk::Device *device, const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
574 {
575 	struct Data
576 	{
577 		Data(const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
578 		    : draw(draw)
579 		    , batch(batch)
580 		    , finally(finally)
581 		{}
582 		marl::Loan<DrawCall> draw;
583 		marl::Loan<BatchData> batch;
584 		std::shared_ptr<marl::Finally> finally;
585 	};
586 	auto data = std::make_shared<Data>(draw, batch, finally);
587 	for(int cluster = 0; cluster < MaxClusterCount; cluster++)
588 	{
589 		batch->clusterTickets[cluster].onCall([device, data, cluster] {
590 			auto &draw = data->draw;
591 			auto &batch = data->batch;
592 			MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster);
593 			draw->pixelRoutine(device, &batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data);
594 			batch->clusterTickets[cluster].done();
595 		});
596 	}
597 }
598 
synchronize()599 void Renderer::synchronize()
600 {
601 	MARL_SCOPED_EVENT("synchronize");
602 	auto ticket = drawTickets.take();
603 	ticket.wait();
604 	device->updateSamplingRoutineSnapshotCache();
605 	ticket.done();
606 }
607 
processPrimitiveVertices(unsigned int triangleIndicesOut[MaxBatchSize+1][3],const void * primitiveIndices,VkIndexType indexType,unsigned int start,unsigned int triangleCount,VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode)608 void DrawCall::processPrimitiveVertices(
609     unsigned int triangleIndicesOut[MaxBatchSize + 1][3],
610     const void *primitiveIndices,
611     VkIndexType indexType,
612     unsigned int start,
613     unsigned int triangleCount,
614     VkPrimitiveTopology topology,
615     VkProvokingVertexModeEXT provokingVertexMode)
616 {
617 	if(!primitiveIndices)
618 	{
619 		struct LinearIndex
620 		{
621 			unsigned int operator[](unsigned int i) { return i; }
622 		};
623 
624 		if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount))
625 		{
626 			return;
627 		}
628 	}
629 	else
630 	{
631 		switch(indexType)
632 		{
633 		case VK_INDEX_TYPE_UINT16:
634 			if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint16_t *>(primitiveIndices), start, triangleCount))
635 			{
636 				return;
637 			}
638 			break;
639 		case VK_INDEX_TYPE_UINT32:
640 			if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint32_t *>(primitiveIndices), start, triangleCount))
641 			{
642 				return;
643 			}
644 			break;
645 			break;
646 		default:
647 			ASSERT(false);
648 			return;
649 		}
650 	}
651 
652 	// setBatchIndices() takes care of the point case, since it's different due to the compaction
653 	if(topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
654 	{
655 		// Repeat the last index to allow for SIMD width overrun.
656 		triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2];
657 		triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2];
658 		triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2];
659 	}
660 }
661 
setupSolidTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)662 int DrawCall::setupSolidTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
663 {
664 	auto &state = drawCall->setupState;
665 
666 	int ms = state.multiSampleCount;
667 	const DrawData *data = drawCall->data;
668 	int visible = 0;
669 
670 	for(int i = 0; i < count; i++, triangles++)
671 	{
672 		Vertex &v0 = triangles->v0;
673 		Vertex &v1 = triangles->v1;
674 		Vertex &v2 = triangles->v2;
675 
676 		Polygon polygon(&v0.position, &v1.position, &v2.position);
677 
678 		if((v0.cullMask | v1.cullMask | v2.cullMask) == 0)
679 		{
680 			continue;
681 		}
682 
683 		if((v0.clipFlags & v1.clipFlags & v2.clipFlags) != Clipper::CLIP_FINITE)
684 		{
685 			continue;
686 		}
687 
688 		int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
689 		if(clipFlagsOr != Clipper::CLIP_FINITE)
690 		{
691 			if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall))
692 			{
693 				continue;
694 			}
695 		}
696 
697 		if(drawCall->setupRoutine(device, primitives, triangles, &polygon, data))
698 		{
699 			primitives += ms;
700 			visible++;
701 		}
702 	}
703 
704 	return visible;
705 }
706 
setupWireframeTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)707 int DrawCall::setupWireframeTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
708 {
709 	auto &state = drawCall->setupState;
710 
711 	int ms = state.multiSampleCount;
712 	int visible = 0;
713 
714 	for(int i = 0; i < count; i++)
715 	{
716 		const Vertex &v0 = triangles[i].v0;
717 		const Vertex &v1 = triangles[i].v1;
718 		const Vertex &v2 = triangles[i].v2;
719 
720 		float A = ((float)v0.projected.y - (float)v2.projected.y) * (float)v1.projected.x +
721 		          ((float)v2.projected.y - (float)v1.projected.y) * (float)v0.projected.x +
722 		          ((float)v1.projected.y - (float)v0.projected.y) * (float)v2.projected.x;  // Area
723 
724 		int w0w1w2 = bit_cast<int>(v0.w) ^
725 		             bit_cast<int>(v1.w) ^
726 		             bit_cast<int>(v2.w);
727 
728 		A = w0w1w2 < 0 ? -A : A;
729 
730 		bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (A >= 0.0f) : (A <= 0.0f);
731 
732 		if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
733 		{
734 			if(frontFacing) continue;
735 		}
736 		if(state.cullMode & VK_CULL_MODE_BACK_BIT)
737 		{
738 			if(!frontFacing) continue;
739 		}
740 
741 		Triangle lines[3];
742 		lines[0].v0 = v0;
743 		lines[0].v1 = v1;
744 		lines[1].v0 = v1;
745 		lines[1].v1 = v2;
746 		lines[2].v0 = v2;
747 		lines[2].v1 = v0;
748 
749 		for(int i = 0; i < 3; i++)
750 		{
751 			if(setupLine(device, *primitives, lines[i], *drawCall))
752 			{
753 				primitives += ms;
754 				visible++;
755 			}
756 		}
757 	}
758 
759 	return visible;
760 }
761 
setupPointTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)762 int DrawCall::setupPointTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
763 {
764 	auto &state = drawCall->setupState;
765 
766 	int ms = state.multiSampleCount;
767 	int visible = 0;
768 
769 	for(int i = 0; i < count; i++)
770 	{
771 		const Vertex &v0 = triangles[i].v0;
772 		const Vertex &v1 = triangles[i].v1;
773 		const Vertex &v2 = triangles[i].v2;
774 
775 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
776 		          (v0.x * v2.y - v0.y * v2.x) * v1.w +
777 		          (v2.x * v1.y - v1.x * v2.y) * v0.w;
778 
779 		bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
780 		if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
781 		{
782 			if(frontFacing) continue;
783 		}
784 		if(state.cullMode & VK_CULL_MODE_BACK_BIT)
785 		{
786 			if(!frontFacing) continue;
787 		}
788 
789 		Triangle points[3];
790 		points[0].v0 = v0;
791 		points[1].v0 = v1;
792 		points[2].v0 = v2;
793 
794 		for(int i = 0; i < 3; i++)
795 		{
796 			if(setupPoint(device, *primitives, points[i], *drawCall))
797 			{
798 				primitives += ms;
799 				visible++;
800 			}
801 		}
802 	}
803 
804 	return visible;
805 }
806 
setupLines(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)807 int DrawCall::setupLines(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
808 {
809 	auto &state = drawCall->setupState;
810 
811 	int visible = 0;
812 	int ms = state.multiSampleCount;
813 
814 	for(int i = 0; i < count; i++)
815 	{
816 		if(setupLine(device, *primitives, *triangles, *drawCall))
817 		{
818 			primitives += ms;
819 			visible++;
820 		}
821 
822 		triangles++;
823 	}
824 
825 	return visible;
826 }
827 
setupPoints(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)828 int DrawCall::setupPoints(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
829 {
830 	auto &state = drawCall->setupState;
831 
832 	int visible = 0;
833 	int ms = state.multiSampleCount;
834 
835 	for(int i = 0; i < count; i++)
836 	{
837 		if(setupPoint(device, *primitives, *triangles, *drawCall))
838 		{
839 			primitives += ms;
840 			visible++;
841 		}
842 
843 		triangles++;
844 	}
845 
846 	return visible;
847 }
848 
setupLine(vk::Device * device,Primitive & primitive,Triangle & triangle,const DrawCall & draw)849 bool DrawCall::setupLine(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
850 {
851 	const DrawData &data = *draw.data;
852 
853 	float lineWidth = data.lineWidth;
854 
855 	Vertex &v0 = triangle.v0;
856 	Vertex &v1 = triangle.v1;
857 
858 	if((v0.cullMask | v1.cullMask) == 0)
859 	{
860 		return false;
861 	}
862 
863 	const float4 &P0 = v0.position;
864 	const float4 &P1 = v1.position;
865 
866 	if(P0.w <= 0 && P1.w <= 0)
867 	{
868 		return false;
869 	}
870 
871 	constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
872 
873 	const float W = data.WxF[0] * (1.0f / subPixF);
874 	const float H = data.HxF[0] * (1.0f / subPixF);
875 
876 	float dx = W * (P1.x / P1.w - P0.x / P0.w);
877 	float dy = H * (P1.y / P1.w - P0.y / P0.w);
878 
879 	if(dx == 0 && dy == 0)
880 	{
881 		return false;
882 	}
883 
884 	if(draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
885 	{
886 		// Rectangle centered on the line segment
887 
888 		float4 P[4];
889 		int C[4];
890 
891 		P[0] = P0;
892 		P[1] = P1;
893 		P[2] = P1;
894 		P[3] = P0;
895 
896 		float scale = lineWidth * 0.5f / sqrt(dx * dx + dy * dy);
897 
898 		dx *= scale;
899 		dy *= scale;
900 
901 		float dx0h = dx * P0.w / H;
902 		float dy0w = dy * P0.w / W;
903 
904 		float dx1h = dx * P1.w / H;
905 		float dy1w = dy * P1.w / W;
906 
907 		P[0].x += -dy0w;
908 		P[0].y += +dx0h;
909 		C[0] = Clipper::ComputeClipFlags(P[0], draw.depthClipEnable);
910 
911 		P[1].x += -dy1w;
912 		P[1].y += +dx1h;
913 		C[1] = Clipper::ComputeClipFlags(P[1], draw.depthClipEnable);
914 
915 		P[2].x += +dy1w;
916 		P[2].y += -dx1h;
917 		C[2] = Clipper::ComputeClipFlags(P[2], draw.depthClipEnable);
918 
919 		P[3].x += +dy0w;
920 		P[3].y += -dx0h;
921 		C[3] = Clipper::ComputeClipFlags(P[3], draw.depthClipEnable);
922 
923 		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
924 		{
925 			Polygon polygon(P, 4);
926 
927 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
928 
929 			if(clipFlagsOr != Clipper::CLIP_FINITE)
930 			{
931 				if(!Clipper::Clip(polygon, clipFlagsOr, draw))
932 				{
933 					return false;
934 				}
935 			}
936 
937 			return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
938 		}
939 	}
940 	else if(false)  // TODO(b/80135519): Deprecate
941 	{
942 		// Connecting diamonds polygon
943 		// This shape satisfies the diamond test convention, except for the exit rule part.
944 		// Line segments with overlapping endpoints have duplicate fragments.
945 		// The ideal algorithm requires half-open line rasterization (b/80135519).
946 
947 		float4 P[8];
948 		int C[8];
949 
950 		P[0] = P0;
951 		P[1] = P0;
952 		P[2] = P0;
953 		P[3] = P0;
954 		P[4] = P1;
955 		P[5] = P1;
956 		P[6] = P1;
957 		P[7] = P1;
958 
959 		float dx0 = lineWidth * 0.5f * P0.w / W;
960 		float dy0 = lineWidth * 0.5f * P0.w / H;
961 
962 		float dx1 = lineWidth * 0.5f * P1.w / W;
963 		float dy1 = lineWidth * 0.5f * P1.w / H;
964 
965 		P[0].x += -dx0;
966 		C[0] = Clipper::ComputeClipFlags(P[0], draw.depthClipEnable);
967 
968 		P[1].y += +dy0;
969 		C[1] = Clipper::ComputeClipFlags(P[1], draw.depthClipEnable);
970 
971 		P[2].x += +dx0;
972 		C[2] = Clipper::ComputeClipFlags(P[2], draw.depthClipEnable);
973 
974 		P[3].y += -dy0;
975 		C[3] = Clipper::ComputeClipFlags(P[3], draw.depthClipEnable);
976 
977 		P[4].x += -dx1;
978 		C[4] = Clipper::ComputeClipFlags(P[4], draw.depthClipEnable);
979 
980 		P[5].y += +dy1;
981 		C[5] = Clipper::ComputeClipFlags(P[5], draw.depthClipEnable);
982 
983 		P[6].x += +dx1;
984 		C[6] = Clipper::ComputeClipFlags(P[6], draw.depthClipEnable);
985 
986 		P[7].y += -dy1;
987 		C[7] = Clipper::ComputeClipFlags(P[7], draw.depthClipEnable);
988 
989 		if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
990 		{
991 			float4 L[6];
992 
993 			if(dx > -dy)
994 			{
995 				if(dx > dy)  // Right
996 				{
997 					L[0] = P[0];
998 					L[1] = P[1];
999 					L[2] = P[5];
1000 					L[3] = P[6];
1001 					L[4] = P[7];
1002 					L[5] = P[3];
1003 				}
1004 				else  // Down
1005 				{
1006 					L[0] = P[0];
1007 					L[1] = P[4];
1008 					L[2] = P[5];
1009 					L[3] = P[6];
1010 					L[4] = P[2];
1011 					L[5] = P[3];
1012 				}
1013 			}
1014 			else
1015 			{
1016 				if(dx > dy)  // Up
1017 				{
1018 					L[0] = P[0];
1019 					L[1] = P[1];
1020 					L[2] = P[2];
1021 					L[3] = P[6];
1022 					L[4] = P[7];
1023 					L[5] = P[4];
1024 				}
1025 				else  // Left
1026 				{
1027 					L[0] = P[1];
1028 					L[1] = P[2];
1029 					L[2] = P[3];
1030 					L[3] = P[7];
1031 					L[4] = P[4];
1032 					L[5] = P[5];
1033 				}
1034 			}
1035 
1036 			Polygon polygon(L, 6);
1037 
1038 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7];
1039 
1040 			if(clipFlagsOr != Clipper::CLIP_FINITE)
1041 			{
1042 				if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1043 				{
1044 					return false;
1045 				}
1046 			}
1047 
1048 			return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1049 		}
1050 	}
1051 	else
1052 	{
1053 		// Parallelogram approximating Bresenham line
1054 		// This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the
1055 		// duplicate fragment rasterization problem and satisfies all of Vulkan's minimum
1056 		// requirements for Bresenham line segment rasterization.
1057 
1058 		float4 P[8];
1059 		P[0] = P0;
1060 		P[1] = P0;
1061 		P[2] = P0;
1062 		P[3] = P0;
1063 		P[4] = P1;
1064 		P[5] = P1;
1065 		P[6] = P1;
1066 		P[7] = P1;
1067 
1068 		float dx0 = lineWidth * 0.5f * P0.w / W;
1069 		float dy0 = lineWidth * 0.5f * P0.w / H;
1070 
1071 		float dx1 = lineWidth * 0.5f * P1.w / W;
1072 		float dy1 = lineWidth * 0.5f * P1.w / H;
1073 
1074 		P[0].x += -dx0;
1075 		P[1].y += +dy0;
1076 		P[2].x += +dx0;
1077 		P[3].y += -dy0;
1078 		P[4].x += -dx1;
1079 		P[5].y += +dy1;
1080 		P[6].x += +dx1;
1081 		P[7].y += -dy1;
1082 
1083 		float4 L[4];
1084 
1085 		if(dx > -dy)
1086 		{
1087 			if(dx > dy)  // Right
1088 			{
1089 				L[0] = P[1];
1090 				L[1] = P[5];
1091 				L[2] = P[7];
1092 				L[3] = P[3];
1093 			}
1094 			else  // Down
1095 			{
1096 				L[0] = P[0];
1097 				L[1] = P[4];
1098 				L[2] = P[6];
1099 				L[3] = P[2];
1100 			}
1101 		}
1102 		else
1103 		{
1104 			if(dx > dy)  // Up
1105 			{
1106 				L[0] = P[0];
1107 				L[1] = P[2];
1108 				L[2] = P[6];
1109 				L[3] = P[4];
1110 			}
1111 			else  // Left
1112 			{
1113 				L[0] = P[1];
1114 				L[1] = P[3];
1115 				L[2] = P[7];
1116 				L[3] = P[5];
1117 			}
1118 		}
1119 
1120 		int C0 = Clipper::ComputeClipFlags(L[0], draw.depthClipEnable);
1121 		int C1 = Clipper::ComputeClipFlags(L[1], draw.depthClipEnable);
1122 		int C2 = Clipper::ComputeClipFlags(L[2], draw.depthClipEnable);
1123 		int C3 = Clipper::ComputeClipFlags(L[3], draw.depthClipEnable);
1124 
1125 		if((C0 & C1 & C2 & C3) == Clipper::CLIP_FINITE)
1126 		{
1127 			Polygon polygon(L, 4);
1128 
1129 			int clipFlagsOr = C0 | C1 | C2 | C3;
1130 
1131 			if(clipFlagsOr != Clipper::CLIP_FINITE)
1132 			{
1133 				if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1134 				{
1135 					return false;
1136 				}
1137 			}
1138 
1139 			return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1140 		}
1141 	}
1142 
1143 	return false;
1144 }
1145 
setupPoint(vk::Device * device,Primitive & primitive,Triangle & triangle,const DrawCall & draw)1146 bool DrawCall::setupPoint(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1147 {
1148 	const DrawData &data = *draw.data;
1149 
1150 	Vertex &v = triangle.v0;
1151 
1152 	if(v.cullMask == 0)
1153 	{
1154 		return false;
1155 	}
1156 
1157 	float pSize = v.pointSize;
1158 
1159 	pSize = clamp(pSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE));
1160 
1161 	float4 P[4];
1162 	int C[4];
1163 
1164 	P[0] = v.position;
1165 	P[1] = v.position;
1166 	P[2] = v.position;
1167 	P[3] = v.position;
1168 
1169 	const float X = pSize * P[0].w * data.halfPixelX[0];
1170 	const float Y = pSize * P[0].w * data.halfPixelY[0];
1171 
1172 	P[0].x -= X;
1173 	P[0].y += Y;
1174 	C[0] = Clipper::ComputeClipFlags(P[0], draw.depthClipEnable);
1175 
1176 	P[1].x += X;
1177 	P[1].y += Y;
1178 	C[1] = Clipper::ComputeClipFlags(P[1], draw.depthClipEnable);
1179 
1180 	P[2].x += X;
1181 	P[2].y -= Y;
1182 	C[2] = Clipper::ComputeClipFlags(P[2], draw.depthClipEnable);
1183 
1184 	P[3].x -= X;
1185 	P[3].y -= Y;
1186 	C[3] = Clipper::ComputeClipFlags(P[3], draw.depthClipEnable);
1187 
1188 	Polygon polygon(P, 4);
1189 
1190 	if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1191 	{
1192 		int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
1193 
1194 		if(clipFlagsOr != Clipper::CLIP_FINITE)
1195 		{
1196 			if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1197 			{
1198 				return false;
1199 			}
1200 		}
1201 
1202 		primitive.pointSizeInv = 1.0f / pSize;
1203 
1204 		return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1205 	}
1206 
1207 	return false;
1208 }
1209 
addQuery(vk::Query * query)1210 void Renderer::addQuery(vk::Query *query)
1211 {
1212 	ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1213 	ASSERT(!occlusionQuery);
1214 
1215 	occlusionQuery = query;
1216 }
1217 
removeQuery(vk::Query * query)1218 void Renderer::removeQuery(vk::Query *query)
1219 {
1220 	ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1221 	ASSERT(occlusionQuery == query);
1222 
1223 	occlusionQuery = nullptr;
1224 }
1225 
1226 }  // namespace sw
1227