1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Renderer.hpp"
16
17 #include "Clipper.hpp"
18 #include "Polygon.hpp"
19 #include "Primitive.hpp"
20 #include "Vertex.hpp"
21 #include "Pipeline/Constants.hpp"
22 #include "Pipeline/SpirvShader.hpp"
23 #include "Reactor/Reactor.hpp"
24 #include "System/Debug.hpp"
25 #include "System/Half.hpp"
26 #include "System/Math.hpp"
27 #include "System/Memory.hpp"
28 #include "System/Timer.hpp"
29 #include "Vulkan/VkConfig.hpp"
30 #include "Vulkan/VkDescriptorSet.hpp"
31 #include "Vulkan/VkDevice.hpp"
32 #include "Vulkan/VkFence.hpp"
33 #include "Vulkan/VkImageView.hpp"
34 #include "Vulkan/VkPipelineLayout.hpp"
35 #include "Vulkan/VkQueryPool.hpp"
36
37 #include "marl/containers.h"
38 #include "marl/defer.h"
39 #include "marl/trace.h"
40
41 #undef max
42
43 #ifndef NDEBUG
44 unsigned int minPrimitives = 1;
45 unsigned int maxPrimitives = 1 << 21;
46 #endif
47
48 namespace sw {
49
50 template<typename T>
setBatchIndices(unsigned int batch[128][3],VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode,T indices,unsigned int start,unsigned int triangleCount)51 inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount)
52 {
53 bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
54
55 switch(topology)
56 {
57 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
58 {
59 auto index = start;
60 auto pointBatch = &(batch[0][0]);
61 for(unsigned int i = 0; i < triangleCount; i++)
62 {
63 *pointBatch++ = indices[index++];
64 }
65
66 // Repeat the last index to allow for SIMD width overrun.
67 index--;
68 for(unsigned int i = 0; i < 3; i++)
69 {
70 *pointBatch++ = indices[index];
71 }
72 }
73 break;
74 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
75 {
76 auto index = 2 * start;
77 for(unsigned int i = 0; i < triangleCount; i++)
78 {
79 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
80 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
81 batch[i][2] = indices[index + 1];
82
83 index += 2;
84 }
85 }
86 break;
87 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
88 {
89 auto index = start;
90 for(unsigned int i = 0; i < triangleCount; i++)
91 {
92 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
93 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
94 batch[i][2] = indices[index + 1];
95
96 index += 1;
97 }
98 }
99 break;
100 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
101 {
102 auto index = 3 * start;
103 for(unsigned int i = 0; i < triangleCount; i++)
104 {
105 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
106 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
107 batch[i][2] = indices[index + (provokeFirst ? 2 : 1)];
108
109 index += 3;
110 }
111 }
112 break;
113 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
114 {
115 auto index = start;
116 for(unsigned int i = 0; i < triangleCount; i++)
117 {
118 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
119 batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)];
120 batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)];
121
122 index += 1;
123 }
124 }
125 break;
126 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
127 {
128 auto index = start + 1;
129 for(unsigned int i = 0; i < triangleCount; i++)
130 {
131 batch[i][provokeFirst ? 0 : 2] = indices[index + 0];
132 batch[i][provokeFirst ? 1 : 0] = indices[index + 1];
133 batch[i][provokeFirst ? 2 : 1] = indices[0];
134
135 index += 1;
136 }
137 }
138 break;
139 default:
140 ASSERT(false);
141 return false;
142 }
143
144 return true;
145 }
146
DrawCall()147 DrawCall::DrawCall()
148 {
149 // TODO(b/140991626): Use allocateUninitialized() instead of allocateZeroOrPoison() to improve startup peformance.
150 data = (DrawData *)sw::allocateZeroOrPoison(sizeof(DrawData));
151 }
152
~DrawCall()153 DrawCall::~DrawCall()
154 {
155 sw::freeMemory(data);
156 }
157
Renderer(vk::Device * device)158 Renderer::Renderer(vk::Device *device)
159 : device(device)
160 {
161 vertexProcessor.setRoutineCacheSize(1024);
162 pixelProcessor.setRoutineCacheSize(1024);
163 setupProcessor.setRoutineCacheSize(1024);
164 }
165
~Renderer()166 Renderer::~Renderer()
167 {
168 drawTickets.take().wait();
169 }
170
171 // Renderer objects have to be mem aligned to the alignment provided in the class declaration
operator new(size_t size)172 void *Renderer::operator new(size_t size)
173 {
174 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
175 return vk::allocateHostMemory(sizeof(Renderer), alignof(Renderer), vk::NULL_ALLOCATION_CALLBACKS, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
176 }
177
operator delete(void * mem)178 void Renderer::operator delete(void *mem)
179 {
180 vk::freeHostMemory(mem, vk::NULL_ALLOCATION_CALLBACKS);
181 }
182
draw(const vk::GraphicsPipeline * pipeline,const vk::DynamicState & dynamicState,unsigned int count,int baseVertex,CountedEvent * events,int instanceID,int layer,void * indexBuffer,const VkRect2D & renderArea,const vk::Pipeline::PushConstantStorage & pushConstants,bool update)183 void Renderer::draw(const vk::GraphicsPipeline *pipeline, const vk::DynamicState &dynamicState, unsigned int count, int baseVertex,
184 CountedEvent *events, int instanceID, int layer, void *indexBuffer, const VkRect2D &renderArea,
185 const vk::Pipeline::PushConstantStorage &pushConstants, bool update)
186 {
187 if(count == 0) { return; }
188
189 auto id = nextDrawID++;
190 MARL_SCOPED_EVENT("draw %d", id);
191
192 marl::Pool<sw::DrawCall>::Loan draw;
193 {
194 MARL_SCOPED_EVENT("drawCallPool.borrow()");
195 draw = drawCallPool.borrow();
196 }
197 draw->id = id;
198
199 const vk::GraphicsState &pipelineState = pipeline->getCombinedState(dynamicState);
200
201 // A graphics pipeline must always be "complete" before it can be used for drawing. A
202 // complete graphics pipeline always includes the vertex input interface and
203 // pre-rasterization subsets, but only includes fragment and fragment output interface
204 // subsets if rasterizer discard is not enabled.
205 //
206 // Note that in the following, the setupPrimitives, setupRoutine and pixelRoutine functions
207 // are only called when rasterizer discard is not enabled. If rasterizer discard is
208 // enabled, these functions and state for the latter two states are not set.
209 const vk::VertexInputInterfaceState &vertexInputInterfaceState = pipelineState.getVertexInputInterfaceState();
210 const vk::PreRasterizationState &preRasterizationState = pipelineState.getPreRasterizationState();
211 const vk::FragmentState *fragmentState = nullptr;
212 const vk::FragmentOutputInterfaceState *fragmentOutputInterfaceState = nullptr;
213
214 const bool hasRasterizerDiscard = preRasterizationState.hasRasterizerDiscard();
215 if(!hasRasterizerDiscard)
216 {
217 fragmentState = &pipelineState.getFragmentState();
218 fragmentOutputInterfaceState = &pipelineState.getFragmentOutputInterfaceState();
219
220 pixelProcessor.setBlendConstant(fragmentOutputInterfaceState->getBlendConstants());
221 }
222
223 const vk::Inputs &inputs = pipeline->getInputs();
224
225 if(update)
226 {
227 MARL_SCOPED_EVENT("update");
228
229 const sw::SpirvShader *fragmentShader = pipeline->getShader(VK_SHADER_STAGE_FRAGMENT_BIT).get();
230 const sw::SpirvShader *vertexShader = pipeline->getShader(VK_SHADER_STAGE_VERTEX_BIT).get();
231
232 const vk::Attachments attachments = pipeline->getAttachments();
233
234 vertexState = vertexProcessor.update(pipelineState, vertexShader, inputs);
235 vertexRoutine = vertexProcessor.routine(vertexState, preRasterizationState.getPipelineLayout(), vertexShader, inputs.getDescriptorSets());
236
237 if(!hasRasterizerDiscard)
238 {
239 setupState = setupProcessor.update(pipelineState, fragmentShader, vertexShader, attachments);
240 setupRoutine = setupProcessor.routine(setupState);
241
242 pixelState = pixelProcessor.update(pipelineState, fragmentShader, vertexShader, attachments, hasOcclusionQuery());
243 pixelRoutine = pixelProcessor.routine(pixelState, fragmentState->getPipelineLayout(), fragmentShader, inputs.getDescriptorSets());
244 }
245 }
246
247 draw->preRasterizationContainsImageWrite = pipeline->preRasterizationContainsImageWrite();
248 draw->fragmentContainsImageWrite = pipeline->fragmentContainsImageWrite();
249
250 // The sample count affects the batch size even if rasterization is disabled.
251 // TODO(b/147812380): Eliminate the dependency between multisampling and batch size.
252 int ms = hasRasterizerDiscard ? 1 : fragmentOutputInterfaceState->getSampleCount();
253 ASSERT(ms > 0);
254
255 unsigned int numPrimitivesPerBatch = MaxBatchSize / ms;
256
257 DrawData *data = draw->data;
258 draw->occlusionQuery = occlusionQuery;
259 draw->batchDataPool = &batchDataPool;
260 draw->numPrimitives = count;
261 draw->numPrimitivesPerBatch = numPrimitivesPerBatch;
262 draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch;
263 draw->topology = vertexInputInterfaceState.getTopology();
264 draw->provokingVertexMode = preRasterizationState.getProvokingVertexMode();
265 draw->lineRasterizationMode = preRasterizationState.getLineRasterizationMode();
266 draw->descriptorSetObjects = inputs.getDescriptorSetObjects();
267 draw->preRasterizationPipelineLayout = preRasterizationState.getPipelineLayout();
268 draw->depthClipEnable = preRasterizationState.getDepthClipEnable();
269 draw->depthClipNegativeOneToOne = preRasterizationState.getDepthClipNegativeOneToOne();
270 data->lineWidth = preRasterizationState.getLineWidth();
271 data->rasterizerDiscard = hasRasterizerDiscard;
272
273 data->descriptorSets = inputs.getDescriptorSets();
274 data->descriptorDynamicOffsets = inputs.getDescriptorDynamicOffsets();
275
276 for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++)
277 {
278 const sw::Stream &stream = inputs.getStream(i);
279 data->input[i] = stream.buffer;
280 data->robustnessSize[i] = stream.robustnessSize;
281 data->stride[i] = inputs.getVertexStride(i, vertexInputInterfaceState.hasDynamicVertexStride());
282 }
283
284 data->indices = indexBuffer;
285 data->layer = layer;
286 data->instanceID = instanceID;
287 data->baseVertex = baseVertex;
288
289 if(indexBuffer)
290 {
291 draw->indexType = pipeline->getIndexBuffer().getIndexType();
292 }
293
294 draw->vertexRoutine = vertexRoutine;
295
296 vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->preRasterizationPipelineLayout, device);
297
298 // Viewport
299 {
300 const VkViewport &viewport = preRasterizationState.getViewport();
301
302 float W = 0.5f * viewport.width;
303 float H = 0.5f * viewport.height;
304 float X0 = viewport.x + W;
305 float Y0 = viewport.y + H;
306 float N = viewport.minDepth;
307 float F = viewport.maxDepth;
308 float Z = F - N;
309 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
310
311 data->WxF = W * subPixF;
312 data->HxF = H * subPixF;
313 data->X0xF = X0 * subPixF - subPixF / 2;
314 data->Y0xF = Y0 * subPixF - subPixF / 2;
315 data->halfPixelX = 0.5f / W;
316 data->halfPixelY = 0.5f / H;
317 data->depthRange = Z;
318 data->depthNear = N;
319 data->constantDepthBias = preRasterizationState.getConstantDepthBias();
320 data->slopeDepthBias = preRasterizationState.getSlopeDepthBias();
321 data->depthBiasClamp = preRasterizationState.getDepthBiasClamp();
322
323 // Adjust viewport transform based on the negativeOneToOne state.
324 if(preRasterizationState.getDepthClipNegativeOneToOne())
325 {
326 data->depthRange = Z * 0.5f;
327 data->depthNear = (F + N) * 0.5f;
328 }
329 }
330
331 // Scissor
332 {
333 const VkRect2D &scissor = preRasterizationState.getScissor();
334
335 int x0 = renderArea.offset.x;
336 int y0 = renderArea.offset.y;
337 int x1 = x0 + renderArea.extent.width;
338 int y1 = y0 + renderArea.extent.height;
339 data->scissorX0 = clamp<int>(scissor.offset.x, x0, x1);
340 data->scissorX1 = clamp<int>(scissor.offset.x + scissor.extent.width, x0, x1);
341 data->scissorY0 = clamp<int>(scissor.offset.y, y0, y1);
342 data->scissorY1 = clamp<int>(scissor.offset.y + scissor.extent.height, y0, y1);
343 }
344
345 if(!hasRasterizerDiscard)
346 {
347 const VkPolygonMode polygonMode = preRasterizationState.getPolygonMode();
348
349 DrawCall::SetupFunction setupPrimitives = nullptr;
350 if(vertexInputInterfaceState.isDrawTriangle(false, polygonMode))
351 {
352 switch(preRasterizationState.getPolygonMode())
353 {
354 case VK_POLYGON_MODE_FILL:
355 setupPrimitives = &DrawCall::setupSolidTriangles;
356 break;
357 case VK_POLYGON_MODE_LINE:
358 setupPrimitives = &DrawCall::setupWireframeTriangles;
359 numPrimitivesPerBatch /= 3;
360 break;
361 case VK_POLYGON_MODE_POINT:
362 setupPrimitives = &DrawCall::setupPointTriangles;
363 numPrimitivesPerBatch /= 3;
364 break;
365 default:
366 UNSUPPORTED("polygon mode: %d", int(preRasterizationState.getPolygonMode()));
367 return;
368 }
369 }
370 else if(vertexInputInterfaceState.isDrawLine(false, polygonMode))
371 {
372 setupPrimitives = &DrawCall::setupLines;
373 }
374 else // Point primitive topology
375 {
376 setupPrimitives = &DrawCall::setupPoints;
377 }
378
379 draw->setupState = setupState;
380 draw->setupRoutine = setupRoutine;
381 draw->pixelRoutine = pixelRoutine;
382 draw->setupPrimitives = setupPrimitives;
383 draw->fragmentPipelineLayout = fragmentState->getPipelineLayout();
384
385 if(pixelState.stencilActive)
386 {
387 data->stencil[0].set(fragmentState->getFrontStencil().reference, fragmentState->getFrontStencil().compareMask, fragmentState->getFrontStencil().writeMask);
388 data->stencil[1].set(fragmentState->getBackStencil().reference, fragmentState->getBackStencil().compareMask, fragmentState->getBackStencil().writeMask);
389 }
390
391 data->factor = pixelProcessor.factor;
392
393 if(pixelState.alphaToCoverage)
394 {
395 if(ms == 4)
396 {
397 data->a2c0 = 0.2f;
398 data->a2c1 = 0.4f;
399 data->a2c2 = 0.6f;
400 data->a2c3 = 0.8f;
401 }
402 else if(ms == 2)
403 {
404 data->a2c0 = 0.25f;
405 data->a2c1 = 0.75f;
406 }
407 else if(ms == 1)
408 {
409 data->a2c0 = 0.5f;
410 }
411 else
412 ASSERT(false);
413 }
414
415 if(pixelState.occlusionEnabled)
416 {
417 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
418 {
419 data->occlusion[cluster] = 0;
420 }
421 }
422
423 // Viewport
424 {
425 const vk::Attachments attachments = pipeline->getAttachments();
426 if(attachments.depthBuffer)
427 {
428 switch(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT))
429 {
430 case VK_FORMAT_D16_UNORM:
431 // Minimum is 1 unit, but account for potential floating-point rounding errors
432 data->minimumResolvableDepthDifference = 1.01f / 0xFFFF;
433 break;
434 case VK_FORMAT_D32_SFLOAT:
435 // The minimum resolvable depth difference is determined per-polygon for floating-point depth
436 // buffers. DrawData::minimumResolvableDepthDifference is unused.
437 break;
438 default:
439 UNSUPPORTED("Depth format: %d", int(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT)));
440 }
441 }
442 }
443
444 // Target
445 {
446 const vk::Attachments attachments = pipeline->getAttachments();
447
448 for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
449 {
450 draw->colorBuffer[index] = attachments.colorBuffer[index];
451
452 if(draw->colorBuffer[index])
453 {
454 data->colorBuffer[index] = (unsigned int *)attachments.colorBuffer[index]->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->layer);
455 data->colorPitchB[index] = attachments.colorBuffer[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
456 data->colorSliceB[index] = attachments.colorBuffer[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
457 }
458 }
459
460 draw->depthBuffer = attachments.depthBuffer;
461 draw->stencilBuffer = attachments.stencilBuffer;
462
463 if(draw->depthBuffer)
464 {
465 data->depthBuffer = (float *)attachments.depthBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->layer);
466 data->depthPitchB = attachments.depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
467 data->depthSliceB = attachments.depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
468 }
469
470 if(draw->stencilBuffer)
471 {
472 data->stencilBuffer = (unsigned char *)attachments.stencilBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->layer);
473 data->stencilPitchB = attachments.stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
474 data->stencilSliceB = attachments.stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
475 }
476 }
477
478 if(draw->fragmentPipelineLayout != draw->preRasterizationPipelineLayout)
479 {
480 vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->fragmentPipelineLayout, device);
481 }
482 }
483
484 // Push constants
485 {
486 data->pushConstants = pushConstants;
487 }
488
489 draw->events = events;
490
491 DrawCall::run(device, draw, &drawTickets, clusterQueues);
492 }
493
setup()494 void DrawCall::setup()
495 {
496 if(occlusionQuery != nullptr)
497 {
498 occlusionQuery->start();
499 }
500
501 if(events)
502 {
503 events->add();
504 }
505 }
506
teardown(vk::Device * device)507 void DrawCall::teardown(vk::Device *device)
508 {
509 if(events)
510 {
511 events->done();
512 events = nullptr;
513 }
514
515 vertexRoutine = {};
516 setupRoutine = {};
517 pixelRoutine = {};
518
519 if(preRasterizationContainsImageWrite)
520 {
521 vk::DescriptorSet::ContentsChanged(descriptorSetObjects, preRasterizationPipelineLayout, device);
522 }
523
524 if(!data->rasterizerDiscard)
525 {
526 if(occlusionQuery != nullptr)
527 {
528 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
529 {
530 occlusionQuery->add(data->occlusion[cluster]);
531 }
532 occlusionQuery->finish();
533 }
534
535 for(auto *target : colorBuffer)
536 {
537 if(target)
538 {
539 target->contentsChanged(vk::Image::DIRECT_MEMORY_ACCESS);
540 }
541 }
542
543 // If pre-rasterization and fragment use the same pipeline, and pre-rasterization
544 // also contains image writes, don't double-notify the descriptor set.
545 const bool descSetAlreadyNotified = preRasterizationContainsImageWrite && fragmentPipelineLayout == preRasterizationPipelineLayout;
546 if(fragmentContainsImageWrite && !descSetAlreadyNotified)
547 {
548 vk::DescriptorSet::ContentsChanged(descriptorSetObjects, fragmentPipelineLayout, device);
549 }
550 }
551 }
552
run(vk::Device * device,const marl::Loan<DrawCall> & draw,marl::Ticket::Queue * tickets,marl::Ticket::Queue clusterQueues[MaxClusterCount])553 void DrawCall::run(vk::Device *device, const marl::Loan<DrawCall> &draw, marl::Ticket::Queue *tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount])
554 {
555 draw->setup();
556
557 const auto numPrimitives = draw->numPrimitives;
558 const auto numPrimitivesPerBatch = draw->numPrimitivesPerBatch;
559 const auto numBatches = draw->numBatches;
560
561 auto ticket = tickets->take();
562 auto finally = marl::make_shared_finally([device, draw, ticket] {
563 MARL_SCOPED_EVENT("FINISH draw %d", draw->id);
564 draw->teardown(device);
565 ticket.done();
566 });
567
568 for(unsigned int batchId = 0; batchId < numBatches; batchId++)
569 {
570 auto batch = draw->batchDataPool->borrow();
571 batch->id = batchId;
572 batch->firstPrimitive = batch->id * numPrimitivesPerBatch;
573 batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive;
574
575 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
576 {
577 batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take());
578 }
579
580 marl::schedule([device, draw, batch, finally] {
581 processVertices(device, draw.get(), batch.get());
582
583 if(!draw->data->rasterizerDiscard)
584 {
585 processPrimitives(device, draw.get(), batch.get());
586
587 if(batch->numVisible > 0)
588 {
589 processPixels(device, draw, batch, finally);
590 return;
591 }
592 }
593
594 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
595 {
596 batch->clusterTickets[cluster].done();
597 }
598 });
599 }
600 }
601
processVertices(vk::Device * device,DrawCall * draw,BatchData * batch)602 void DrawCall::processVertices(vk::Device *device, DrawCall *draw, BatchData *batch)
603 {
604 MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id);
605
606 unsigned int triangleIndices[MaxBatchSize + 1][3]; // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size.
607 {
608 MARL_SCOPED_EVENT("processPrimitiveVertices");
609 processPrimitiveVertices(
610 triangleIndices,
611 draw->data->indices,
612 draw->indexType,
613 batch->firstPrimitive,
614 batch->numPrimitives,
615 draw->topology,
616 draw->provokingVertexMode);
617 }
618
619 auto &vertexTask = batch->vertexTask;
620 vertexTask.primitiveStart = batch->firstPrimitive;
621 // We're only using batch compaction for points, not lines
622 vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3);
623 if(vertexTask.vertexCache.drawCall != draw->id)
624 {
625 vertexTask.vertexCache.clear();
626 vertexTask.vertexCache.drawCall = draw->id;
627 }
628
629 draw->vertexRoutine(device, &batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data);
630 }
631
processPrimitives(vk::Device * device,DrawCall * draw,BatchData * batch)632 void DrawCall::processPrimitives(vk::Device *device, DrawCall *draw, BatchData *batch)
633 {
634 MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id);
635 auto triangles = &batch->triangles[0];
636 auto primitives = &batch->primitives[0];
637 batch->numVisible = draw->setupPrimitives(device, triangles, primitives, draw, batch->numPrimitives);
638 }
639
processPixels(vk::Device * device,const marl::Loan<DrawCall> & draw,const marl::Loan<BatchData> & batch,const std::shared_ptr<marl::Finally> & finally)640 void DrawCall::processPixels(vk::Device *device, const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
641 {
642 struct Data
643 {
644 Data(const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
645 : draw(draw)
646 , batch(batch)
647 , finally(finally)
648 {}
649 marl::Loan<DrawCall> draw;
650 marl::Loan<BatchData> batch;
651 std::shared_ptr<marl::Finally> finally;
652 };
653 auto data = std::make_shared<Data>(draw, batch, finally);
654 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
655 {
656 batch->clusterTickets[cluster].onCall([device, data, cluster] {
657 auto &draw = data->draw;
658 auto &batch = data->batch;
659 MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster);
660 draw->pixelRoutine(device, &batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data);
661 batch->clusterTickets[cluster].done();
662 });
663 }
664 }
665
synchronize()666 void Renderer::synchronize()
667 {
668 MARL_SCOPED_EVENT("synchronize");
669 auto ticket = drawTickets.take();
670 ticket.wait();
671 device->updateSamplingRoutineSnapshotCache();
672 ticket.done();
673 }
674
processPrimitiveVertices(unsigned int triangleIndicesOut[MaxBatchSize+1][3],const void * primitiveIndices,VkIndexType indexType,unsigned int start,unsigned int triangleCount,VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode)675 void DrawCall::processPrimitiveVertices(
676 unsigned int triangleIndicesOut[MaxBatchSize + 1][3],
677 const void *primitiveIndices,
678 VkIndexType indexType,
679 unsigned int start,
680 unsigned int triangleCount,
681 VkPrimitiveTopology topology,
682 VkProvokingVertexModeEXT provokingVertexMode)
683 {
684 if(!primitiveIndices)
685 {
686 struct LinearIndex
687 {
688 unsigned int operator[](unsigned int i) { return i; }
689 };
690
691 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount))
692 {
693 return;
694 }
695 }
696 else
697 {
698 switch(indexType)
699 {
700 case VK_INDEX_TYPE_UINT16:
701 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint16_t *>(primitiveIndices), start, triangleCount))
702 {
703 return;
704 }
705 break;
706 case VK_INDEX_TYPE_UINT32:
707 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint32_t *>(primitiveIndices), start, triangleCount))
708 {
709 return;
710 }
711 break;
712 break;
713 default:
714 ASSERT(false);
715 return;
716 }
717 }
718
719 // setBatchIndices() takes care of the point case, since it's different due to the compaction
720 if(topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
721 {
722 // Repeat the last index to allow for SIMD width overrun.
723 triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2];
724 triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2];
725 triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2];
726 }
727 }
728
setupSolidTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)729 int DrawCall::setupSolidTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
730 {
731 auto &state = drawCall->setupState;
732
733 int ms = state.multiSampleCount;
734 const DrawData *data = drawCall->data;
735 int visible = 0;
736
737 for(int i = 0; i < count; i++, triangles++)
738 {
739 Vertex &v0 = triangles->v0;
740 Vertex &v1 = triangles->v1;
741 Vertex &v2 = triangles->v2;
742
743 Polygon polygon(&v0.position, &v1.position, &v2.position);
744
745 if((v0.cullMask | v1.cullMask | v2.cullMask) == 0)
746 {
747 continue;
748 }
749
750 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) != Clipper::CLIP_FINITE)
751 {
752 continue;
753 }
754
755 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
756 if(clipFlagsOr != Clipper::CLIP_FINITE)
757 {
758 if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall))
759 {
760 continue;
761 }
762 }
763
764 if(drawCall->setupRoutine(device, primitives, triangles, &polygon, data))
765 {
766 primitives += ms;
767 visible++;
768 }
769 }
770
771 return visible;
772 }
773
setupWireframeTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)774 int DrawCall::setupWireframeTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
775 {
776 auto &state = drawCall->setupState;
777
778 int ms = state.multiSampleCount;
779 int visible = 0;
780
781 for(int i = 0; i < count; i++)
782 {
783 const Vertex &v0 = triangles[i].v0;
784 const Vertex &v1 = triangles[i].v1;
785 const Vertex &v2 = triangles[i].v2;
786
787 float A = ((float)v0.projected.y - (float)v2.projected.y) * (float)v1.projected.x +
788 ((float)v2.projected.y - (float)v1.projected.y) * (float)v0.projected.x +
789 ((float)v1.projected.y - (float)v0.projected.y) * (float)v2.projected.x; // Area
790
791 int w0w1w2 = bit_cast<int>(v0.w) ^
792 bit_cast<int>(v1.w) ^
793 bit_cast<int>(v2.w);
794
795 A = w0w1w2 < 0 ? -A : A;
796
797 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (A >= 0.0f) : (A <= 0.0f);
798
799 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
800 {
801 if(frontFacing) continue;
802 }
803 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
804 {
805 if(!frontFacing) continue;
806 }
807
808 Triangle lines[3];
809 lines[0].v0 = v0;
810 lines[0].v1 = v1;
811 lines[1].v0 = v1;
812 lines[1].v1 = v2;
813 lines[2].v0 = v2;
814 lines[2].v1 = v0;
815
816 for(int i = 0; i < 3; i++)
817 {
818 if(setupLine(device, *primitives, lines[i], *drawCall))
819 {
820 primitives += ms;
821 visible++;
822 }
823 }
824 }
825
826 return visible;
827 }
828
setupPointTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)829 int DrawCall::setupPointTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
830 {
831 auto &state = drawCall->setupState;
832
833 int ms = state.multiSampleCount;
834 int visible = 0;
835
836 for(int i = 0; i < count; i++)
837 {
838 const Vertex &v0 = triangles[i].v0;
839 const Vertex &v1 = triangles[i].v1;
840 const Vertex &v2 = triangles[i].v2;
841
842 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
843 (v0.x * v2.y - v0.y * v2.x) * v1.w +
844 (v2.x * v1.y - v1.x * v2.y) * v0.w;
845
846 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
847 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
848 {
849 if(frontFacing) continue;
850 }
851 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
852 {
853 if(!frontFacing) continue;
854 }
855
856 Triangle points[3];
857 points[0].v0 = v0;
858 points[1].v0 = v1;
859 points[2].v0 = v2;
860
861 for(int i = 0; i < 3; i++)
862 {
863 if(setupPoint(device, *primitives, points[i], *drawCall))
864 {
865 primitives += ms;
866 visible++;
867 }
868 }
869 }
870
871 return visible;
872 }
873
setupLines(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)874 int DrawCall::setupLines(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
875 {
876 auto &state = drawCall->setupState;
877
878 int visible = 0;
879 int ms = state.multiSampleCount;
880
881 for(int i = 0; i < count; i++)
882 {
883 if(setupLine(device, *primitives, *triangles, *drawCall))
884 {
885 primitives += ms;
886 visible++;
887 }
888
889 triangles++;
890 }
891
892 return visible;
893 }
894
setupPoints(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)895 int DrawCall::setupPoints(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
896 {
897 auto &state = drawCall->setupState;
898
899 int visible = 0;
900 int ms = state.multiSampleCount;
901
902 for(int i = 0; i < count; i++)
903 {
904 if(setupPoint(device, *primitives, *triangles, *drawCall))
905 {
906 primitives += ms;
907 visible++;
908 }
909
910 triangles++;
911 }
912
913 return visible;
914 }
915
setupLine(vk::Device * device,Primitive & primitive,Triangle & triangle,const DrawCall & draw)916 bool DrawCall::setupLine(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
917 {
918 const Vertex &v0 = triangle.v0;
919 const Vertex &v1 = triangle.v1;
920
921 if((v0.cullMask | v1.cullMask) == 0)
922 {
923 return false;
924 }
925
926 const float4 &P0 = v0.position;
927 const float4 &P1 = v1.position;
928
929 if(P0.w <= 0 && P1.w <= 0)
930 {
931 return false;
932 }
933
934 const DrawData &data = *draw.data;
935 const float lineWidth = data.lineWidth;
936 const int clipFlags = draw.depthClipEnable ? Clipper::CLIP_FRUSTUM : Clipper::CLIP_SIDES;
937 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
938
939 const float W = data.WxF * (1.0f / subPixF);
940 const float H = data.HxF * (1.0f / subPixF);
941
942 float dx = W * (P1.x / P1.w - P0.x / P0.w);
943 float dy = H * (P1.y / P1.w - P0.y / P0.w);
944
945 if(dx == 0 && dy == 0)
946 {
947 return false;
948 }
949
950 if(draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
951 {
952 // Rectangle centered on the line segment
953
954 float4 P[4];
955
956 P[0] = P0;
957 P[1] = P1;
958 P[2] = P1;
959 P[3] = P0;
960
961 float scale = lineWidth * 0.5f / sqrt(dx * dx + dy * dy);
962
963 dx *= scale;
964 dy *= scale;
965
966 float dx0h = dx * P0.w / H;
967 float dy0w = dy * P0.w / W;
968
969 float dx1h = dx * P1.w / H;
970 float dy1w = dy * P1.w / W;
971
972 P[0].x += -dy0w;
973 P[0].y += +dx0h;
974
975 P[1].x += -dy1w;
976 P[1].y += +dx1h;
977
978 P[2].x += +dy1w;
979 P[2].y += -dx1h;
980
981 P[3].x += +dy0w;
982 P[3].y += -dx0h;
983
984 Polygon polygon(P, 4);
985
986 if(!Clipper::Clip(polygon, clipFlags, draw))
987 {
988 return false;
989 }
990
991 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
992 }
993 else if(false) // TODO(b/80135519): Deprecate
994 {
995 // Connecting diamonds polygon
996 // This shape satisfies the diamond test convention, except for the exit rule part.
997 // Line segments with overlapping endpoints have duplicate fragments.
998 // The ideal algorithm requires half-open line rasterization (b/80135519).
999
1000 float4 P[8];
1001
1002 P[0] = P0;
1003 P[1] = P0;
1004 P[2] = P0;
1005 P[3] = P0;
1006 P[4] = P1;
1007 P[5] = P1;
1008 P[6] = P1;
1009 P[7] = P1;
1010
1011 float dx0 = lineWidth * 0.5f * P0.w / W;
1012 float dy0 = lineWidth * 0.5f * P0.w / H;
1013
1014 float dx1 = lineWidth * 0.5f * P1.w / W;
1015 float dy1 = lineWidth * 0.5f * P1.w / H;
1016
1017 P[0].x += -dx0;
1018 P[1].y += +dy0;
1019 P[2].x += +dx0;
1020 P[3].y += -dy0;
1021 P[4].x += -dx1;
1022 P[5].y += +dy1;
1023 P[6].x += +dx1;
1024 P[7].y += -dy1;
1025
1026 float4 L[6];
1027
1028 if(dx > -dy)
1029 {
1030 if(dx > dy) // Right
1031 {
1032 L[0] = P[0];
1033 L[1] = P[1];
1034 L[2] = P[5];
1035 L[3] = P[6];
1036 L[4] = P[7];
1037 L[5] = P[3];
1038 }
1039 else // Down
1040 {
1041 L[0] = P[0];
1042 L[1] = P[4];
1043 L[2] = P[5];
1044 L[3] = P[6];
1045 L[4] = P[2];
1046 L[5] = P[3];
1047 }
1048 }
1049 else
1050 {
1051 if(dx > dy) // Up
1052 {
1053 L[0] = P[0];
1054 L[1] = P[1];
1055 L[2] = P[2];
1056 L[3] = P[6];
1057 L[4] = P[7];
1058 L[5] = P[4];
1059 }
1060 else // Left
1061 {
1062 L[0] = P[1];
1063 L[1] = P[2];
1064 L[2] = P[3];
1065 L[3] = P[7];
1066 L[4] = P[4];
1067 L[5] = P[5];
1068 }
1069 }
1070
1071 Polygon polygon(L, 6);
1072
1073 if(!Clipper::Clip(polygon, clipFlags, draw))
1074 {
1075 return false;
1076 }
1077
1078 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1079 }
1080 else
1081 {
1082 // Parallelogram approximating Bresenham line
1083 // This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the
1084 // duplicate fragment rasterization problem and satisfies all of Vulkan's minimum
1085 // requirements for Bresenham line segment rasterization.
1086
1087 float4 P[8];
1088 P[0] = P0;
1089 P[1] = P0;
1090 P[2] = P0;
1091 P[3] = P0;
1092 P[4] = P1;
1093 P[5] = P1;
1094 P[6] = P1;
1095 P[7] = P1;
1096
1097 float dx0 = lineWidth * 0.5f * P0.w / W;
1098 float dy0 = lineWidth * 0.5f * P0.w / H;
1099
1100 float dx1 = lineWidth * 0.5f * P1.w / W;
1101 float dy1 = lineWidth * 0.5f * P1.w / H;
1102
1103 P[0].x += -dx0;
1104 P[1].y += +dy0;
1105 P[2].x += +dx0;
1106 P[3].y += -dy0;
1107 P[4].x += -dx1;
1108 P[5].y += +dy1;
1109 P[6].x += +dx1;
1110 P[7].y += -dy1;
1111
1112 float4 L[4];
1113
1114 if(dx > -dy)
1115 {
1116 if(dx > dy) // Right
1117 {
1118 L[0] = P[1];
1119 L[1] = P[5];
1120 L[2] = P[7];
1121 L[3] = P[3];
1122 }
1123 else // Down
1124 {
1125 L[0] = P[0];
1126 L[1] = P[4];
1127 L[2] = P[6];
1128 L[3] = P[2];
1129 }
1130 }
1131 else
1132 {
1133 if(dx > dy) // Up
1134 {
1135 L[0] = P[0];
1136 L[1] = P[2];
1137 L[2] = P[6];
1138 L[3] = P[4];
1139 }
1140 else // Left
1141 {
1142 L[0] = P[1];
1143 L[1] = P[3];
1144 L[2] = P[7];
1145 L[3] = P[5];
1146 }
1147 }
1148
1149 Polygon polygon(L, 4);
1150
1151 if(!Clipper::Clip(polygon, clipFlags, draw))
1152 {
1153 return false;
1154 }
1155
1156 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1157 }
1158
1159 return false;
1160 }
1161
setupPoint(vk::Device * device,Primitive & primitive,Triangle & triangle,const DrawCall & draw)1162 bool DrawCall::setupPoint(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1163 {
1164 const Vertex &v = triangle.v0;
1165
1166 if(v.cullMask == 0)
1167 {
1168 return false;
1169 }
1170
1171 const DrawData &data = *draw.data;
1172 const int clipFlags = draw.depthClipEnable ? Clipper::CLIP_FRUSTUM : Clipper::CLIP_SIDES;
1173
1174 const float pSize = clamp(v.pointSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE));
1175 const float X = pSize * v.position.w * data.halfPixelX;
1176 const float Y = pSize * v.position.w * data.halfPixelY;
1177
1178 float4 P[4];
1179
1180 P[0] = v.position;
1181 P[0].x -= X;
1182 P[0].y += Y;
1183
1184 P[1] = v.position;
1185 P[1].x += X;
1186 P[1].y += Y;
1187
1188 P[2] = v.position;
1189 P[2].x += X;
1190 P[2].y -= Y;
1191
1192 P[3] = v.position;
1193 P[3].x -= X;
1194 P[3].y -= Y;
1195
1196 Polygon polygon(P, 4);
1197
1198 if(!Clipper::Clip(polygon, clipFlags, draw))
1199 {
1200 return false;
1201 }
1202
1203 primitive.pointSizeInv = 1.0f / pSize;
1204
1205 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1206 }
1207
addQuery(vk::Query * query)1208 void Renderer::addQuery(vk::Query *query)
1209 {
1210 ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1211 ASSERT(!occlusionQuery);
1212
1213 occlusionQuery = query;
1214 }
1215
removeQuery(vk::Query * query)1216 void Renderer::removeQuery(vk::Query *query)
1217 {
1218 ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1219 ASSERT(occlusionQuery == query);
1220
1221 occlusionQuery = nullptr;
1222 }
1223
1224 } // namespace sw
1225