1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Renderer.hpp"
16
17 #include "Clipper.hpp"
18 #include "Polygon.hpp"
19 #include "Primitive.hpp"
20 #include "Vertex.hpp"
21 #include "Pipeline/Constants.hpp"
22 #include "Pipeline/SpirvShader.hpp"
23 #include "Reactor/Reactor.hpp"
24 #include "System/Debug.hpp"
25 #include "System/Half.hpp"
26 #include "System/Math.hpp"
27 #include "System/Memory.hpp"
28 #include "System/Timer.hpp"
29 #include "Vulkan/VkConfig.hpp"
30 #include "Vulkan/VkDescriptorSet.hpp"
31 #include "Vulkan/VkDevice.hpp"
32 #include "Vulkan/VkFence.hpp"
33 #include "Vulkan/VkImageView.hpp"
34 #include "Vulkan/VkPipelineLayout.hpp"
35 #include "Vulkan/VkQueryPool.hpp"
36
37 #include "marl/containers.h"
38 #include "marl/defer.h"
39 #include "marl/trace.h"
40
41 #undef max
42
43 #ifndef NDEBUG
44 unsigned int minPrimitives = 1;
45 unsigned int maxPrimitives = 1 << 21;
46 #endif
47
48 namespace sw {
49
50 template<typename T>
setBatchIndices(unsigned int batch[128][3],VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode,T indices,unsigned int start,unsigned int triangleCount)51 inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount)
52 {
53 bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
54
55 switch(topology)
56 {
57 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
58 {
59 auto index = start;
60 auto pointBatch = &(batch[0][0]);
61 for(unsigned int i = 0; i < triangleCount; i++)
62 {
63 *pointBatch++ = indices[index++];
64 }
65
66 // Repeat the last index to allow for SIMD width overrun.
67 index--;
68 for(unsigned int i = 0; i < 3; i++)
69 {
70 *pointBatch++ = indices[index];
71 }
72 }
73 break;
74 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
75 {
76 auto index = 2 * start;
77 for(unsigned int i = 0; i < triangleCount; i++)
78 {
79 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
80 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
81 batch[i][2] = indices[index + 1];
82
83 index += 2;
84 }
85 }
86 break;
87 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
88 {
89 auto index = start;
90 for(unsigned int i = 0; i < triangleCount; i++)
91 {
92 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
93 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
94 batch[i][2] = indices[index + 1];
95
96 index += 1;
97 }
98 }
99 break;
100 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
101 {
102 auto index = 3 * start;
103 for(unsigned int i = 0; i < triangleCount; i++)
104 {
105 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
106 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
107 batch[i][2] = indices[index + (provokeFirst ? 2 : 1)];
108
109 index += 3;
110 }
111 }
112 break;
113 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
114 {
115 auto index = start;
116 for(unsigned int i = 0; i < triangleCount; i++)
117 {
118 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
119 batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)];
120 batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)];
121
122 index += 1;
123 }
124 }
125 break;
126 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
127 {
128 auto index = start + 1;
129 for(unsigned int i = 0; i < triangleCount; i++)
130 {
131 batch[i][provokeFirst ? 0 : 2] = indices[index + 0];
132 batch[i][provokeFirst ? 1 : 0] = indices[index + 1];
133 batch[i][provokeFirst ? 2 : 1] = indices[0];
134
135 index += 1;
136 }
137 }
138 break;
139 default:
140 ASSERT(false);
141 return false;
142 }
143
144 return true;
145 }
146
DrawCall()147 DrawCall::DrawCall()
148 {
149 // TODO(b/140991626): Use allocateUninitialized() instead of allocateZeroOrPoison() to improve startup peformance.
150 data = (DrawData *)sw::allocateZeroOrPoison(sizeof(DrawData));
151 }
152
~DrawCall()153 DrawCall::~DrawCall()
154 {
155 sw::freeMemory(data);
156 }
157
Renderer(vk::Device * device)158 Renderer::Renderer(vk::Device *device)
159 : device(device)
160 {
161 vertexProcessor.setRoutineCacheSize(1024);
162 pixelProcessor.setRoutineCacheSize(1024);
163 setupProcessor.setRoutineCacheSize(1024);
164 }
165
~Renderer()166 Renderer::~Renderer()
167 {
168 drawTickets.take().wait();
169 }
170
171 // Renderer objects have to be mem aligned to the alignment provided in the class declaration
operator new(size_t size)172 void *Renderer::operator new(size_t size)
173 {
174 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
175 return vk::allocateHostMemory(sizeof(Renderer), alignof(Renderer), vk::NULL_ALLOCATION_CALLBACKS, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
176 }
177
operator delete(void * mem)178 void Renderer::operator delete(void *mem)
179 {
180 vk::freeHostMemory(mem, vk::NULL_ALLOCATION_CALLBACKS);
181 }
182
draw(const vk::GraphicsPipeline * pipeline,const vk::DynamicState & dynamicState,unsigned int count,int baseVertex,CountedEvent * events,int instanceID,int layer,void * indexBuffer,const VkRect2D & renderArea,vk::Pipeline::PushConstantStorage const & pushConstants,bool update)183 void Renderer::draw(const vk::GraphicsPipeline *pipeline, const vk::DynamicState &dynamicState, unsigned int count, int baseVertex,
184 CountedEvent *events, int instanceID, int layer, void *indexBuffer, const VkRect2D &renderArea,
185 vk::Pipeline::PushConstantStorage const &pushConstants, bool update)
186 {
187 if(count == 0) { return; }
188
189 auto id = nextDrawID++;
190 MARL_SCOPED_EVENT("draw %d", id);
191
192 marl::Pool<sw::DrawCall>::Loan draw;
193 {
194 MARL_SCOPED_EVENT("drawCallPool.borrow()");
195 draw = drawCallPool.borrow();
196 }
197 draw->id = id;
198
199 const vk::GraphicsState &pipelineState = pipeline->getState(dynamicState);
200 pixelProcessor.setBlendConstant(pipelineState.getBlendConstants());
201
202 const vk::Inputs &inputs = pipeline->getInputs();
203
204 if(update)
205 {
206 MARL_SCOPED_EVENT("update");
207
208 const sw::SpirvShader *fragmentShader = pipeline->getShader(VK_SHADER_STAGE_FRAGMENT_BIT).get();
209 const sw::SpirvShader *vertexShader = pipeline->getShader(VK_SHADER_STAGE_VERTEX_BIT).get();
210
211 const vk::Attachments attachments = pipeline->getAttachments();
212
213 vertexState = vertexProcessor.update(pipelineState, vertexShader, inputs);
214 setupState = setupProcessor.update(pipelineState, fragmentShader, vertexShader, attachments);
215 pixelState = pixelProcessor.update(pipelineState, fragmentShader, vertexShader, attachments, hasOcclusionQuery());
216
217 vertexRoutine = vertexProcessor.routine(vertexState, pipelineState.getPipelineLayout(), vertexShader, inputs.getDescriptorSets());
218 setupRoutine = setupProcessor.routine(setupState);
219 pixelRoutine = pixelProcessor.routine(pixelState, pipelineState.getPipelineLayout(), fragmentShader, inputs.getDescriptorSets());
220 }
221
222 draw->containsImageWrite = pipeline->containsImageWrite();
223
224 DrawCall::SetupFunction setupPrimitives = nullptr;
225 int ms = pipelineState.getSampleCount();
226 unsigned int numPrimitivesPerBatch = MaxBatchSize / ms;
227
228 if(pipelineState.isDrawTriangle(false))
229 {
230 switch(pipelineState.getPolygonMode())
231 {
232 case VK_POLYGON_MODE_FILL:
233 setupPrimitives = &DrawCall::setupSolidTriangles;
234 break;
235 case VK_POLYGON_MODE_LINE:
236 setupPrimitives = &DrawCall::setupWireframeTriangles;
237 numPrimitivesPerBatch /= 3;
238 break;
239 case VK_POLYGON_MODE_POINT:
240 setupPrimitives = &DrawCall::setupPointTriangles;
241 numPrimitivesPerBatch /= 3;
242 break;
243 default:
244 UNSUPPORTED("polygon mode: %d", int(pipelineState.getPolygonMode()));
245 return;
246 }
247 }
248 else if(pipelineState.isDrawLine(false))
249 {
250 setupPrimitives = &DrawCall::setupLines;
251 }
252 else // Point primitive topology
253 {
254 setupPrimitives = &DrawCall::setupPoints;
255 }
256
257 DrawData *data = draw->data;
258 draw->occlusionQuery = occlusionQuery;
259 draw->batchDataPool = &batchDataPool;
260 draw->numPrimitives = count;
261 draw->numPrimitivesPerBatch = numPrimitivesPerBatch;
262 draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch;
263 draw->topology = pipelineState.getTopology();
264 draw->provokingVertexMode = pipelineState.getProvokingVertexMode();
265 draw->indexType = pipeline->getIndexBuffer().getIndexType();
266 draw->lineRasterizationMode = pipelineState.getLineRasterizationMode();
267 draw->descriptorSetObjects = inputs.getDescriptorSetObjects();
268 draw->pipelineLayout = pipelineState.getPipelineLayout();
269 draw->depthClipEnable = pipelineState.getDepthClipEnable();
270
271 draw->vertexRoutine = vertexRoutine;
272 draw->setupRoutine = setupRoutine;
273 draw->pixelRoutine = pixelRoutine;
274 draw->setupPrimitives = setupPrimitives;
275 draw->setupState = setupState;
276
277 data->descriptorSets = inputs.getDescriptorSets();
278 data->descriptorDynamicOffsets = inputs.getDescriptorDynamicOffsets();
279
280 for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++)
281 {
282 const sw::Stream &stream = inputs.getStream(i);
283 data->input[i] = stream.buffer;
284 data->robustnessSize[i] = stream.robustnessSize;
285 data->stride[i] = inputs.getVertexStride(i, pipelineState.hasDynamicVertexStride());
286 }
287
288 data->indices = indexBuffer;
289 data->layer = layer;
290 data->instanceID = instanceID;
291 data->baseVertex = baseVertex;
292
293 if(pixelState.stencilActive)
294 {
295 data->stencil[0].set(pipelineState.getFrontStencil().reference, pipelineState.getFrontStencil().compareMask, pipelineState.getFrontStencil().writeMask);
296 data->stencil[1].set(pipelineState.getBackStencil().reference, pipelineState.getBackStencil().compareMask, pipelineState.getBackStencil().writeMask);
297 }
298
299 data->lineWidth = pipelineState.getLineWidth();
300
301 data->factor = pixelProcessor.factor;
302
303 if(pixelState.alphaToCoverage)
304 {
305 if(ms == 4)
306 {
307 data->a2c0 = float4(0.2f);
308 data->a2c1 = float4(0.4f);
309 data->a2c2 = float4(0.6f);
310 data->a2c3 = float4(0.8f);
311 }
312 else if(ms == 2)
313 {
314 data->a2c0 = float4(0.25f);
315 data->a2c1 = float4(0.75f);
316 }
317 else if(ms == 1)
318 {
319 data->a2c0 = float4(0.5f);
320 }
321 else
322 ASSERT(false);
323 }
324
325 if(pixelState.occlusionEnabled)
326 {
327 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
328 {
329 data->occlusion[cluster] = 0;
330 }
331 }
332
333 // Viewport
334 {
335 const VkViewport &viewport = pipelineState.getViewport();
336
337 float W = 0.5f * viewport.width;
338 float H = 0.5f * viewport.height;
339 float X0 = viewport.x + W;
340 float Y0 = viewport.y + H;
341 float N = viewport.minDepth;
342 float F = viewport.maxDepth;
343 float Z = F - N;
344 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
345
346 data->WxF = float4(W * subPixF);
347 data->HxF = float4(H * subPixF);
348 data->X0xF = float4(X0 * subPixF - subPixF / 2);
349 data->Y0xF = float4(Y0 * subPixF - subPixF / 2);
350 data->halfPixelX = float4(0.5f / W);
351 data->halfPixelY = float4(0.5f / H);
352 data->viewportHeight = abs(viewport.height);
353 data->depthRange = Z;
354 data->depthNear = N;
355 data->constantDepthBias = pipelineState.getConstantDepthBias();
356 data->slopeDepthBias = pipelineState.getSlopeDepthBias();
357 data->depthBiasClamp = pipelineState.getDepthBiasClamp();
358 data->depthClipEnable = pipelineState.getDepthClipEnable();
359
360 const vk::Attachments attachments = pipeline->getAttachments();
361 if(attachments.depthBuffer)
362 {
363 switch(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT))
364 {
365 case VK_FORMAT_D16_UNORM:
366 data->minimumResolvableDepthDifference = 1.0f / 0xFFFF;
367 break;
368 case VK_FORMAT_D32_SFLOAT:
369 // The minimum resolvable depth difference is determined per-polygon for floating-point depth
370 // buffers. DrawData::minimumResolvableDepthDifference is unused.
371 break;
372 default:
373 UNSUPPORTED("Depth format: %d", int(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT)));
374 }
375 }
376 }
377
378 // Target
379 {
380 const vk::Attachments attachments = pipeline->getAttachments();
381
382 for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
383 {
384 draw->colorBuffer[index] = attachments.colorBuffer[index];
385
386 if(draw->colorBuffer[index])
387 {
388 data->colorBuffer[index] = (unsigned int *)attachments.colorBuffer[index]->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->layer);
389 data->colorPitchB[index] = attachments.colorBuffer[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
390 data->colorSliceB[index] = attachments.colorBuffer[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
391 }
392 }
393
394 draw->depthBuffer = attachments.depthBuffer;
395 draw->stencilBuffer = attachments.stencilBuffer;
396
397 if(draw->depthBuffer)
398 {
399 data->depthBuffer = (float *)attachments.depthBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->layer);
400 data->depthPitchB = attachments.depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
401 data->depthSliceB = attachments.depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
402 }
403
404 if(draw->stencilBuffer)
405 {
406 data->stencilBuffer = (unsigned char *)attachments.stencilBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->layer);
407 data->stencilPitchB = attachments.stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
408 data->stencilSliceB = attachments.stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
409 }
410 }
411
412 // Scissor
413 {
414 const VkRect2D &scissor = pipelineState.getScissor();
415
416 int x0 = renderArea.offset.x;
417 int y0 = renderArea.offset.y;
418 int x1 = x0 + renderArea.extent.width;
419 int y1 = y0 + renderArea.extent.height;
420 data->scissorX0 = clamp<int>(scissor.offset.x, x0, x1);
421 data->scissorX1 = clamp<int>(scissor.offset.x + scissor.extent.width, x0, x1);
422 data->scissorY0 = clamp<int>(scissor.offset.y, y0, y1);
423 data->scissorY1 = clamp<int>(scissor.offset.y + scissor.extent.height, y0, y1);
424 }
425
426 // Push constants
427 {
428 data->pushConstants = pushConstants;
429 }
430
431 draw->events = events;
432
433 vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->pipelineLayout, device);
434
435 DrawCall::run(device, draw, &drawTickets, clusterQueues);
436 }
437
setup()438 void DrawCall::setup()
439 {
440 if(occlusionQuery != nullptr)
441 {
442 occlusionQuery->start();
443 }
444
445 if(events)
446 {
447 events->add();
448 }
449 }
450
teardown(vk::Device * device)451 void DrawCall::teardown(vk::Device *device)
452 {
453 if(events)
454 {
455 events->done();
456 events = nullptr;
457 }
458
459 if(occlusionQuery != nullptr)
460 {
461 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
462 {
463 occlusionQuery->add(data->occlusion[cluster]);
464 }
465 occlusionQuery->finish();
466 }
467
468 vertexRoutine = {};
469 setupRoutine = {};
470 pixelRoutine = {};
471
472 for(auto *target : colorBuffer)
473 {
474 if(target)
475 {
476 target->contentsChanged(vk::Image::DIRECT_MEMORY_ACCESS);
477 }
478 }
479
480 if(containsImageWrite)
481 {
482 vk::DescriptorSet::ContentsChanged(descriptorSetObjects, pipelineLayout, device);
483 }
484 }
485
run(vk::Device * device,const marl::Loan<DrawCall> & draw,marl::Ticket::Queue * tickets,marl::Ticket::Queue clusterQueues[MaxClusterCount])486 void DrawCall::run(vk::Device *device, const marl::Loan<DrawCall> &draw, marl::Ticket::Queue *tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount])
487 {
488 draw->setup();
489
490 auto const numPrimitives = draw->numPrimitives;
491 auto const numPrimitivesPerBatch = draw->numPrimitivesPerBatch;
492 auto const numBatches = draw->numBatches;
493
494 auto ticket = tickets->take();
495 auto finally = marl::make_shared_finally([device, draw, ticket] {
496 MARL_SCOPED_EVENT("FINISH draw %d", draw->id);
497 draw->teardown(device);
498 ticket.done();
499 });
500
501 for(unsigned int batchId = 0; batchId < numBatches; batchId++)
502 {
503 auto batch = draw->batchDataPool->borrow();
504 batch->id = batchId;
505 batch->firstPrimitive = batch->id * numPrimitivesPerBatch;
506 batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive;
507
508 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
509 {
510 batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take());
511 }
512
513 marl::schedule([device, draw, batch, finally] {
514 processVertices(device, draw.get(), batch.get());
515
516 if(!draw->setupState.rasterizerDiscard)
517 {
518 processPrimitives(device, draw.get(), batch.get());
519
520 if(batch->numVisible > 0)
521 {
522 processPixels(device, draw, batch, finally);
523 return;
524 }
525 }
526
527 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
528 {
529 batch->clusterTickets[cluster].done();
530 }
531 });
532 }
533 }
534
processVertices(vk::Device * device,DrawCall * draw,BatchData * batch)535 void DrawCall::processVertices(vk::Device *device, DrawCall *draw, BatchData *batch)
536 {
537 MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id);
538
539 unsigned int triangleIndices[MaxBatchSize + 1][3]; // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size.
540 {
541 MARL_SCOPED_EVENT("processPrimitiveVertices");
542 processPrimitiveVertices(
543 triangleIndices,
544 draw->data->indices,
545 draw->indexType,
546 batch->firstPrimitive,
547 batch->numPrimitives,
548 draw->topology,
549 draw->provokingVertexMode);
550 }
551
552 auto &vertexTask = batch->vertexTask;
553 vertexTask.primitiveStart = batch->firstPrimitive;
554 // We're only using batch compaction for points, not lines
555 vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3);
556 if(vertexTask.vertexCache.drawCall != draw->id)
557 {
558 vertexTask.vertexCache.clear();
559 vertexTask.vertexCache.drawCall = draw->id;
560 }
561
562 draw->vertexRoutine(device, &batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data);
563 }
564
processPrimitives(vk::Device * device,DrawCall * draw,BatchData * batch)565 void DrawCall::processPrimitives(vk::Device *device, DrawCall *draw, BatchData *batch)
566 {
567 MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id);
568 auto triangles = &batch->triangles[0];
569 auto primitives = &batch->primitives[0];
570 batch->numVisible = draw->setupPrimitives(device, triangles, primitives, draw, batch->numPrimitives);
571 }
572
processPixels(vk::Device * device,const marl::Loan<DrawCall> & draw,const marl::Loan<BatchData> & batch,const std::shared_ptr<marl::Finally> & finally)573 void DrawCall::processPixels(vk::Device *device, const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
574 {
575 struct Data
576 {
577 Data(const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
578 : draw(draw)
579 , batch(batch)
580 , finally(finally)
581 {}
582 marl::Loan<DrawCall> draw;
583 marl::Loan<BatchData> batch;
584 std::shared_ptr<marl::Finally> finally;
585 };
586 auto data = std::make_shared<Data>(draw, batch, finally);
587 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
588 {
589 batch->clusterTickets[cluster].onCall([device, data, cluster] {
590 auto &draw = data->draw;
591 auto &batch = data->batch;
592 MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster);
593 draw->pixelRoutine(device, &batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data);
594 batch->clusterTickets[cluster].done();
595 });
596 }
597 }
598
synchronize()599 void Renderer::synchronize()
600 {
601 MARL_SCOPED_EVENT("synchronize");
602 auto ticket = drawTickets.take();
603 ticket.wait();
604 device->updateSamplingRoutineSnapshotCache();
605 ticket.done();
606 }
607
processPrimitiveVertices(unsigned int triangleIndicesOut[MaxBatchSize+1][3],const void * primitiveIndices,VkIndexType indexType,unsigned int start,unsigned int triangleCount,VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode)608 void DrawCall::processPrimitiveVertices(
609 unsigned int triangleIndicesOut[MaxBatchSize + 1][3],
610 const void *primitiveIndices,
611 VkIndexType indexType,
612 unsigned int start,
613 unsigned int triangleCount,
614 VkPrimitiveTopology topology,
615 VkProvokingVertexModeEXT provokingVertexMode)
616 {
617 if(!primitiveIndices)
618 {
619 struct LinearIndex
620 {
621 unsigned int operator[](unsigned int i) { return i; }
622 };
623
624 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount))
625 {
626 return;
627 }
628 }
629 else
630 {
631 switch(indexType)
632 {
633 case VK_INDEX_TYPE_UINT16:
634 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint16_t *>(primitiveIndices), start, triangleCount))
635 {
636 return;
637 }
638 break;
639 case VK_INDEX_TYPE_UINT32:
640 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint32_t *>(primitiveIndices), start, triangleCount))
641 {
642 return;
643 }
644 break;
645 break;
646 default:
647 ASSERT(false);
648 return;
649 }
650 }
651
652 // setBatchIndices() takes care of the point case, since it's different due to the compaction
653 if(topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
654 {
655 // Repeat the last index to allow for SIMD width overrun.
656 triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2];
657 triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2];
658 triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2];
659 }
660 }
661
setupSolidTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)662 int DrawCall::setupSolidTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
663 {
664 auto &state = drawCall->setupState;
665
666 int ms = state.multiSampleCount;
667 const DrawData *data = drawCall->data;
668 int visible = 0;
669
670 for(int i = 0; i < count; i++, triangles++)
671 {
672 Vertex &v0 = triangles->v0;
673 Vertex &v1 = triangles->v1;
674 Vertex &v2 = triangles->v2;
675
676 Polygon polygon(&v0.position, &v1.position, &v2.position);
677
678 if((v0.cullMask | v1.cullMask | v2.cullMask) == 0)
679 {
680 continue;
681 }
682
683 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) != Clipper::CLIP_FINITE)
684 {
685 continue;
686 }
687
688 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
689 if(clipFlagsOr != Clipper::CLIP_FINITE)
690 {
691 if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall))
692 {
693 continue;
694 }
695 }
696
697 if(drawCall->setupRoutine(device, primitives, triangles, &polygon, data))
698 {
699 primitives += ms;
700 visible++;
701 }
702 }
703
704 return visible;
705 }
706
setupWireframeTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)707 int DrawCall::setupWireframeTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
708 {
709 auto &state = drawCall->setupState;
710
711 int ms = state.multiSampleCount;
712 int visible = 0;
713
714 for(int i = 0; i < count; i++)
715 {
716 const Vertex &v0 = triangles[i].v0;
717 const Vertex &v1 = triangles[i].v1;
718 const Vertex &v2 = triangles[i].v2;
719
720 float A = ((float)v0.projected.y - (float)v2.projected.y) * (float)v1.projected.x +
721 ((float)v2.projected.y - (float)v1.projected.y) * (float)v0.projected.x +
722 ((float)v1.projected.y - (float)v0.projected.y) * (float)v2.projected.x; // Area
723
724 int w0w1w2 = bit_cast<int>(v0.w) ^
725 bit_cast<int>(v1.w) ^
726 bit_cast<int>(v2.w);
727
728 A = w0w1w2 < 0 ? -A : A;
729
730 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (A >= 0.0f) : (A <= 0.0f);
731
732 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
733 {
734 if(frontFacing) continue;
735 }
736 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
737 {
738 if(!frontFacing) continue;
739 }
740
741 Triangle lines[3];
742 lines[0].v0 = v0;
743 lines[0].v1 = v1;
744 lines[1].v0 = v1;
745 lines[1].v1 = v2;
746 lines[2].v0 = v2;
747 lines[2].v1 = v0;
748
749 for(int i = 0; i < 3; i++)
750 {
751 if(setupLine(device, *primitives, lines[i], *drawCall))
752 {
753 primitives += ms;
754 visible++;
755 }
756 }
757 }
758
759 return visible;
760 }
761
setupPointTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)762 int DrawCall::setupPointTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
763 {
764 auto &state = drawCall->setupState;
765
766 int ms = state.multiSampleCount;
767 int visible = 0;
768
769 for(int i = 0; i < count; i++)
770 {
771 const Vertex &v0 = triangles[i].v0;
772 const Vertex &v1 = triangles[i].v1;
773 const Vertex &v2 = triangles[i].v2;
774
775 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
776 (v0.x * v2.y - v0.y * v2.x) * v1.w +
777 (v2.x * v1.y - v1.x * v2.y) * v0.w;
778
779 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
780 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
781 {
782 if(frontFacing) continue;
783 }
784 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
785 {
786 if(!frontFacing) continue;
787 }
788
789 Triangle points[3];
790 points[0].v0 = v0;
791 points[1].v0 = v1;
792 points[2].v0 = v2;
793
794 for(int i = 0; i < 3; i++)
795 {
796 if(setupPoint(device, *primitives, points[i], *drawCall))
797 {
798 primitives += ms;
799 visible++;
800 }
801 }
802 }
803
804 return visible;
805 }
806
setupLines(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)807 int DrawCall::setupLines(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
808 {
809 auto &state = drawCall->setupState;
810
811 int visible = 0;
812 int ms = state.multiSampleCount;
813
814 for(int i = 0; i < count; i++)
815 {
816 if(setupLine(device, *primitives, *triangles, *drawCall))
817 {
818 primitives += ms;
819 visible++;
820 }
821
822 triangles++;
823 }
824
825 return visible;
826 }
827
setupPoints(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)828 int DrawCall::setupPoints(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
829 {
830 auto &state = drawCall->setupState;
831
832 int visible = 0;
833 int ms = state.multiSampleCount;
834
835 for(int i = 0; i < count; i++)
836 {
837 if(setupPoint(device, *primitives, *triangles, *drawCall))
838 {
839 primitives += ms;
840 visible++;
841 }
842
843 triangles++;
844 }
845
846 return visible;
847 }
848
setupLine(vk::Device * device,Primitive & primitive,Triangle & triangle,const DrawCall & draw)849 bool DrawCall::setupLine(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
850 {
851 const DrawData &data = *draw.data;
852
853 float lineWidth = data.lineWidth;
854
855 Vertex &v0 = triangle.v0;
856 Vertex &v1 = triangle.v1;
857
858 if((v0.cullMask | v1.cullMask) == 0)
859 {
860 return false;
861 }
862
863 const float4 &P0 = v0.position;
864 const float4 &P1 = v1.position;
865
866 if(P0.w <= 0 && P1.w <= 0)
867 {
868 return false;
869 }
870
871 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
872
873 const float W = data.WxF[0] * (1.0f / subPixF);
874 const float H = data.HxF[0] * (1.0f / subPixF);
875
876 float dx = W * (P1.x / P1.w - P0.x / P0.w);
877 float dy = H * (P1.y / P1.w - P0.y / P0.w);
878
879 if(dx == 0 && dy == 0)
880 {
881 return false;
882 }
883
884 if(draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
885 {
886 // Rectangle centered on the line segment
887
888 float4 P[4];
889 int C[4];
890
891 P[0] = P0;
892 P[1] = P1;
893 P[2] = P1;
894 P[3] = P0;
895
896 float scale = lineWidth * 0.5f / sqrt(dx * dx + dy * dy);
897
898 dx *= scale;
899 dy *= scale;
900
901 float dx0h = dx * P0.w / H;
902 float dy0w = dy * P0.w / W;
903
904 float dx1h = dx * P1.w / H;
905 float dy1w = dy * P1.w / W;
906
907 P[0].x += -dy0w;
908 P[0].y += +dx0h;
909 C[0] = Clipper::ComputeClipFlags(P[0], draw.depthClipEnable);
910
911 P[1].x += -dy1w;
912 P[1].y += +dx1h;
913 C[1] = Clipper::ComputeClipFlags(P[1], draw.depthClipEnable);
914
915 P[2].x += +dy1w;
916 P[2].y += -dx1h;
917 C[2] = Clipper::ComputeClipFlags(P[2], draw.depthClipEnable);
918
919 P[3].x += +dy0w;
920 P[3].y += -dx0h;
921 C[3] = Clipper::ComputeClipFlags(P[3], draw.depthClipEnable);
922
923 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
924 {
925 Polygon polygon(P, 4);
926
927 int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
928
929 if(clipFlagsOr != Clipper::CLIP_FINITE)
930 {
931 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
932 {
933 return false;
934 }
935 }
936
937 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
938 }
939 }
940 else if(false) // TODO(b/80135519): Deprecate
941 {
942 // Connecting diamonds polygon
943 // This shape satisfies the diamond test convention, except for the exit rule part.
944 // Line segments with overlapping endpoints have duplicate fragments.
945 // The ideal algorithm requires half-open line rasterization (b/80135519).
946
947 float4 P[8];
948 int C[8];
949
950 P[0] = P0;
951 P[1] = P0;
952 P[2] = P0;
953 P[3] = P0;
954 P[4] = P1;
955 P[5] = P1;
956 P[6] = P1;
957 P[7] = P1;
958
959 float dx0 = lineWidth * 0.5f * P0.w / W;
960 float dy0 = lineWidth * 0.5f * P0.w / H;
961
962 float dx1 = lineWidth * 0.5f * P1.w / W;
963 float dy1 = lineWidth * 0.5f * P1.w / H;
964
965 P[0].x += -dx0;
966 C[0] = Clipper::ComputeClipFlags(P[0], draw.depthClipEnable);
967
968 P[1].y += +dy0;
969 C[1] = Clipper::ComputeClipFlags(P[1], draw.depthClipEnable);
970
971 P[2].x += +dx0;
972 C[2] = Clipper::ComputeClipFlags(P[2], draw.depthClipEnable);
973
974 P[3].y += -dy0;
975 C[3] = Clipper::ComputeClipFlags(P[3], draw.depthClipEnable);
976
977 P[4].x += -dx1;
978 C[4] = Clipper::ComputeClipFlags(P[4], draw.depthClipEnable);
979
980 P[5].y += +dy1;
981 C[5] = Clipper::ComputeClipFlags(P[5], draw.depthClipEnable);
982
983 P[6].x += +dx1;
984 C[6] = Clipper::ComputeClipFlags(P[6], draw.depthClipEnable);
985
986 P[7].y += -dy1;
987 C[7] = Clipper::ComputeClipFlags(P[7], draw.depthClipEnable);
988
989 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
990 {
991 float4 L[6];
992
993 if(dx > -dy)
994 {
995 if(dx > dy) // Right
996 {
997 L[0] = P[0];
998 L[1] = P[1];
999 L[2] = P[5];
1000 L[3] = P[6];
1001 L[4] = P[7];
1002 L[5] = P[3];
1003 }
1004 else // Down
1005 {
1006 L[0] = P[0];
1007 L[1] = P[4];
1008 L[2] = P[5];
1009 L[3] = P[6];
1010 L[4] = P[2];
1011 L[5] = P[3];
1012 }
1013 }
1014 else
1015 {
1016 if(dx > dy) // Up
1017 {
1018 L[0] = P[0];
1019 L[1] = P[1];
1020 L[2] = P[2];
1021 L[3] = P[6];
1022 L[4] = P[7];
1023 L[5] = P[4];
1024 }
1025 else // Left
1026 {
1027 L[0] = P[1];
1028 L[1] = P[2];
1029 L[2] = P[3];
1030 L[3] = P[7];
1031 L[4] = P[4];
1032 L[5] = P[5];
1033 }
1034 }
1035
1036 Polygon polygon(L, 6);
1037
1038 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7];
1039
1040 if(clipFlagsOr != Clipper::CLIP_FINITE)
1041 {
1042 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1043 {
1044 return false;
1045 }
1046 }
1047
1048 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1049 }
1050 }
1051 else
1052 {
1053 // Parallelogram approximating Bresenham line
1054 // This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the
1055 // duplicate fragment rasterization problem and satisfies all of Vulkan's minimum
1056 // requirements for Bresenham line segment rasterization.
1057
1058 float4 P[8];
1059 P[0] = P0;
1060 P[1] = P0;
1061 P[2] = P0;
1062 P[3] = P0;
1063 P[4] = P1;
1064 P[5] = P1;
1065 P[6] = P1;
1066 P[7] = P1;
1067
1068 float dx0 = lineWidth * 0.5f * P0.w / W;
1069 float dy0 = lineWidth * 0.5f * P0.w / H;
1070
1071 float dx1 = lineWidth * 0.5f * P1.w / W;
1072 float dy1 = lineWidth * 0.5f * P1.w / H;
1073
1074 P[0].x += -dx0;
1075 P[1].y += +dy0;
1076 P[2].x += +dx0;
1077 P[3].y += -dy0;
1078 P[4].x += -dx1;
1079 P[5].y += +dy1;
1080 P[6].x += +dx1;
1081 P[7].y += -dy1;
1082
1083 float4 L[4];
1084
1085 if(dx > -dy)
1086 {
1087 if(dx > dy) // Right
1088 {
1089 L[0] = P[1];
1090 L[1] = P[5];
1091 L[2] = P[7];
1092 L[3] = P[3];
1093 }
1094 else // Down
1095 {
1096 L[0] = P[0];
1097 L[1] = P[4];
1098 L[2] = P[6];
1099 L[3] = P[2];
1100 }
1101 }
1102 else
1103 {
1104 if(dx > dy) // Up
1105 {
1106 L[0] = P[0];
1107 L[1] = P[2];
1108 L[2] = P[6];
1109 L[3] = P[4];
1110 }
1111 else // Left
1112 {
1113 L[0] = P[1];
1114 L[1] = P[3];
1115 L[2] = P[7];
1116 L[3] = P[5];
1117 }
1118 }
1119
1120 int C0 = Clipper::ComputeClipFlags(L[0], draw.depthClipEnable);
1121 int C1 = Clipper::ComputeClipFlags(L[1], draw.depthClipEnable);
1122 int C2 = Clipper::ComputeClipFlags(L[2], draw.depthClipEnable);
1123 int C3 = Clipper::ComputeClipFlags(L[3], draw.depthClipEnable);
1124
1125 if((C0 & C1 & C2 & C3) == Clipper::CLIP_FINITE)
1126 {
1127 Polygon polygon(L, 4);
1128
1129 int clipFlagsOr = C0 | C1 | C2 | C3;
1130
1131 if(clipFlagsOr != Clipper::CLIP_FINITE)
1132 {
1133 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1134 {
1135 return false;
1136 }
1137 }
1138
1139 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1140 }
1141 }
1142
1143 return false;
1144 }
1145
setupPoint(vk::Device * device,Primitive & primitive,Triangle & triangle,const DrawCall & draw)1146 bool DrawCall::setupPoint(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1147 {
1148 const DrawData &data = *draw.data;
1149
1150 Vertex &v = triangle.v0;
1151
1152 if(v.cullMask == 0)
1153 {
1154 return false;
1155 }
1156
1157 float pSize = v.pointSize;
1158
1159 pSize = clamp(pSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE));
1160
1161 float4 P[4];
1162 int C[4];
1163
1164 P[0] = v.position;
1165 P[1] = v.position;
1166 P[2] = v.position;
1167 P[3] = v.position;
1168
1169 const float X = pSize * P[0].w * data.halfPixelX[0];
1170 const float Y = pSize * P[0].w * data.halfPixelY[0];
1171
1172 P[0].x -= X;
1173 P[0].y += Y;
1174 C[0] = Clipper::ComputeClipFlags(P[0], draw.depthClipEnable);
1175
1176 P[1].x += X;
1177 P[1].y += Y;
1178 C[1] = Clipper::ComputeClipFlags(P[1], draw.depthClipEnable);
1179
1180 P[2].x += X;
1181 P[2].y -= Y;
1182 C[2] = Clipper::ComputeClipFlags(P[2], draw.depthClipEnable);
1183
1184 P[3].x -= X;
1185 P[3].y -= Y;
1186 C[3] = Clipper::ComputeClipFlags(P[3], draw.depthClipEnable);
1187
1188 Polygon polygon(P, 4);
1189
1190 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1191 {
1192 int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
1193
1194 if(clipFlagsOr != Clipper::CLIP_FINITE)
1195 {
1196 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1197 {
1198 return false;
1199 }
1200 }
1201
1202 primitive.pointSizeInv = 1.0f / pSize;
1203
1204 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1205 }
1206
1207 return false;
1208 }
1209
addQuery(vk::Query * query)1210 void Renderer::addQuery(vk::Query *query)
1211 {
1212 ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1213 ASSERT(!occlusionQuery);
1214
1215 occlusionQuery = query;
1216 }
1217
removeQuery(vk::Query * query)1218 void Renderer::removeQuery(vk::Query *query)
1219 {
1220 ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1221 ASSERT(occlusionQuery == query);
1222
1223 occlusionQuery = nullptr;
1224 }
1225
1226 } // namespace sw
1227