1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Renderer.hpp"
16
17 #include "Clipper.hpp"
18 #include "Polygon.hpp"
19 #include "Primitive.hpp"
20 #include "Vertex.hpp"
21 #include "Pipeline/Constants.hpp"
22 #include "Pipeline/SpirvShader.hpp"
23 #include "Reactor/Reactor.hpp"
24 #include "System/Debug.hpp"
25 #include "System/Half.hpp"
26 #include "System/Math.hpp"
27 #include "System/Memory.hpp"
28 #include "System/Timer.hpp"
29 #include "Vulkan/VkConfig.h"
30 #include "Vulkan/VkDevice.hpp"
31 #include "Vulkan/VkFence.hpp"
32 #include "Vulkan/VkImageView.hpp"
33 #include "Vulkan/VkQueryPool.hpp"
34
35 #include "marl/containers.h"
36 #include "marl/defer.h"
37 #include "marl/trace.h"
38
39 #undef max
40
41 #ifndef NDEBUG
42 unsigned int minPrimitives = 1;
43 unsigned int maxPrimitives = 1 << 21;
44 #endif
45
46 namespace sw {
47
48 template<typename T>
setBatchIndices(unsigned int batch[128][3],VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode,T indices,unsigned int start,unsigned int triangleCount)49 inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount)
50 {
51 bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
52
53 switch(topology)
54 {
55 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
56 {
57 auto index = start;
58 auto pointBatch = &(batch[0][0]);
59 for(unsigned int i = 0; i < triangleCount; i++)
60 {
61 *pointBatch++ = indices[index++];
62 }
63
64 // Repeat the last index to allow for SIMD width overrun.
65 index--;
66 for(unsigned int i = 0; i < 3; i++)
67 {
68 *pointBatch++ = indices[index];
69 }
70 break;
71 }
72 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
73 {
74 auto index = 2 * start;
75 for(unsigned int i = 0; i < triangleCount; i++)
76 {
77 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
78 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
79 batch[i][2] = indices[index + 1];
80
81 index += 2;
82 }
83 break;
84 }
85 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
86 {
87 auto index = start;
88 for(unsigned int i = 0; i < triangleCount; i++)
89 {
90 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
91 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
92 batch[i][2] = indices[index + 1];
93
94 index += 1;
95 }
96 break;
97 }
98 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
99 {
100 auto index = 3 * start;
101 for(unsigned int i = 0; i < triangleCount; i++)
102 {
103 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
104 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
105 batch[i][2] = indices[index + (provokeFirst ? 2 : 1)];
106
107 index += 3;
108 }
109 break;
110 }
111 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
112 {
113 auto index = start;
114 for(unsigned int i = 0; i < triangleCount; i++)
115 {
116 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
117 batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)];
118 batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)];
119
120 index += 1;
121 }
122 break;
123 }
124 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
125 {
126 auto index = start + 1;
127 for(unsigned int i = 0; i < triangleCount; i++)
128 {
129 batch[i][provokeFirst ? 0 : 2] = indices[index + 0];
130 batch[i][provokeFirst ? 1 : 0] = indices[index + 1];
131 batch[i][provokeFirst ? 2 : 1] = indices[0];
132
133 index += 1;
134 }
135 break;
136 }
137 default:
138 ASSERT(false);
139 return false;
140 }
141
142 return true;
143 }
144
DrawCall()145 DrawCall::DrawCall()
146 {
147 data = (DrawData *)allocate(sizeof(DrawData));
148 data->constants = &constants;
149 }
150
~DrawCall()151 DrawCall::~DrawCall()
152 {
153 deallocate(data);
154 }
155
Renderer(vk::Device * device)156 Renderer::Renderer(vk::Device *device)
157 : device(device)
158 {
159 VertexProcessor::setRoutineCacheSize(1024);
160 PixelProcessor::setRoutineCacheSize(1024);
161 SetupProcessor::setRoutineCacheSize(1024);
162 }
163
~Renderer()164 Renderer::~Renderer()
165 {
166 drawTickets.take().wait();
167 }
168
169 // Renderer objects have to be mem aligned to the alignment provided in the class declaration
operator new(size_t size)170 void *Renderer::operator new(size_t size)
171 {
172 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
173 return vk::allocate(sizeof(Renderer), alignof(Renderer), vk::DEVICE_MEMORY, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
174 }
175
operator delete(void * mem)176 void Renderer::operator delete(void *mem)
177 {
178 vk::deallocate(mem, vk::DEVICE_MEMORY);
179 }
180
draw(const sw::Context * context,VkIndexType indexType,unsigned int count,int baseVertex,TaskEvents * events,int instanceID,int viewID,void * indexBuffer,const VkExtent3D & framebufferExtent,PushConstantStorage const & pushConstants,bool update)181 void Renderer::draw(const sw::Context *context, VkIndexType indexType, unsigned int count, int baseVertex,
182 TaskEvents *events, int instanceID, int viewID, void *indexBuffer, const VkExtent3D &framebufferExtent,
183 PushConstantStorage const &pushConstants, bool update)
184 {
185 if(count == 0) { return; }
186
187 auto id = nextDrawID++;
188 MARL_SCOPED_EVENT("draw %d", id);
189
190 #ifndef NDEBUG
191 {
192 unsigned int minPrimitives = 1;
193 unsigned int maxPrimitives = 1 << 21;
194 if(count < minPrimitives || count > maxPrimitives)
195 {
196 return;
197 }
198 }
199 #endif
200
201 int ms = context->sampleCount;
202
203 if(!context->multiSampleMask)
204 {
205 return;
206 }
207
208 marl::Pool<sw::DrawCall>::Loan draw;
209 {
210 MARL_SCOPED_EVENT("drawCallPool.borrow()");
211 draw = drawCallPool.borrow();
212 }
213 draw->id = id;
214
215 if(update)
216 {
217 MARL_SCOPED_EVENT("update");
218 vertexState = VertexProcessor::update(context);
219 setupState = SetupProcessor::update(context);
220 pixelState = PixelProcessor::update(context);
221
222 vertexRoutine = VertexProcessor::routine(vertexState, context->pipelineLayout, context->vertexShader, context->descriptorSets);
223 setupRoutine = SetupProcessor::routine(setupState);
224 pixelRoutine = PixelProcessor::routine(pixelState, context->pipelineLayout, context->pixelShader, context->descriptorSets);
225 }
226
227 DrawCall::SetupFunction setupPrimitives = nullptr;
228 unsigned int numPrimitivesPerBatch = MaxBatchSize / ms;
229
230 if(context->isDrawTriangle(false))
231 {
232 switch(context->polygonMode)
233 {
234 case VK_POLYGON_MODE_FILL:
235 setupPrimitives = &DrawCall::setupSolidTriangles;
236 break;
237 case VK_POLYGON_MODE_LINE:
238 setupPrimitives = &DrawCall::setupWireframeTriangles;
239 numPrimitivesPerBatch /= 3;
240 break;
241 case VK_POLYGON_MODE_POINT:
242 setupPrimitives = &DrawCall::setupPointTriangles;
243 numPrimitivesPerBatch /= 3;
244 break;
245 default:
246 UNSUPPORTED("polygon mode: %d", int(context->polygonMode));
247 return;
248 }
249 }
250 else if(context->isDrawLine(false))
251 {
252 setupPrimitives = &DrawCall::setupLines;
253 }
254 else // Point primitive topology
255 {
256 setupPrimitives = &DrawCall::setupPoints;
257 }
258
259 DrawData *data = draw->data;
260 draw->occlusionQuery = occlusionQuery;
261 draw->batchDataPool = &batchDataPool;
262 draw->numPrimitives = count;
263 draw->numPrimitivesPerBatch = numPrimitivesPerBatch;
264 draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch;
265 draw->topology = context->topology;
266 draw->provokingVertexMode = context->provokingVertexMode;
267 draw->indexType = indexType;
268 draw->lineRasterizationMode = context->lineRasterizationMode;
269
270 draw->vertexRoutine = vertexRoutine;
271 draw->setupRoutine = setupRoutine;
272 draw->pixelRoutine = pixelRoutine;
273 draw->setupPrimitives = setupPrimitives;
274 draw->setupState = setupState;
275
276 data->descriptorSets = context->descriptorSets;
277 data->descriptorDynamicOffsets = context->descriptorDynamicOffsets;
278
279 for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++)
280 {
281 data->input[i] = context->input[i].buffer;
282 data->robustnessSize[i] = context->input[i].robustnessSize;
283 data->stride[i] = context->input[i].vertexStride;
284 }
285
286 data->indices = indexBuffer;
287 data->viewID = viewID;
288 data->instanceID = instanceID;
289 data->baseVertex = baseVertex;
290
291 if(pixelState.stencilActive)
292 {
293 data->stencil[0].set(context->frontStencil.reference, context->frontStencil.compareMask, context->frontStencil.writeMask);
294 data->stencil[1].set(context->backStencil.reference, context->backStencil.compareMask, context->backStencil.writeMask);
295 }
296
297 data->lineWidth = context->lineWidth;
298
299 data->factor = factor;
300
301 if(pixelState.alphaToCoverage)
302 {
303 if(ms == 4)
304 {
305 data->a2c0 = float4(0.2f);
306 data->a2c1 = float4(0.4f);
307 data->a2c2 = float4(0.6f);
308 data->a2c3 = float4(0.8f);
309 }
310 else if(ms == 2)
311 {
312 data->a2c0 = float4(0.25f);
313 data->a2c1 = float4(0.75f);
314 }
315 else
316 ASSERT(false);
317 }
318
319 if(pixelState.occlusionEnabled)
320 {
321 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
322 {
323 data->occlusion[cluster] = 0;
324 }
325 }
326
327 // Viewport
328 {
329 float W = 0.5f * viewport.width;
330 float H = 0.5f * viewport.height;
331 float X0 = viewport.x + W;
332 float Y0 = viewport.y + H;
333 float N = viewport.minDepth;
334 float F = viewport.maxDepth;
335 float Z = F - N;
336 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
337
338 if(context->isDrawTriangle(false))
339 {
340 N += context->depthBias;
341 }
342
343 data->WxF = float4(W * subPixF);
344 data->HxF = float4(H * subPixF);
345 data->X0xF = float4(X0 * subPixF - subPixF / 2);
346 data->Y0xF = float4(Y0 * subPixF - subPixF / 2);
347 data->halfPixelX = float4(0.5f / W);
348 data->halfPixelY = float4(0.5f / H);
349 data->viewportHeight = abs(viewport.height);
350 data->slopeDepthBias = context->slopeDepthBias;
351 data->depthRange = Z;
352 data->depthNear = N;
353 }
354
355 // Target
356 {
357 for(int index = 0; index < RENDERTARGETS; index++)
358 {
359 draw->renderTarget[index] = context->renderTarget[index];
360
361 if(draw->renderTarget[index])
362 {
363 data->colorBuffer[index] = (unsigned int *)context->renderTarget[index]->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->viewID);
364 data->colorPitchB[index] = context->renderTarget[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
365 data->colorSliceB[index] = context->renderTarget[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
366 }
367 }
368
369 draw->depthBuffer = context->depthBuffer;
370 draw->stencilBuffer = context->stencilBuffer;
371
372 if(draw->depthBuffer)
373 {
374 data->depthBuffer = (float *)context->depthBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->viewID);
375 data->depthPitchB = context->depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
376 data->depthSliceB = context->depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
377 }
378
379 if(draw->stencilBuffer)
380 {
381 data->stencilBuffer = (unsigned char *)context->stencilBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->viewID);
382 data->stencilPitchB = context->stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
383 data->stencilSliceB = context->stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
384 }
385 }
386
387 // Scissor
388 {
389 data->scissorX0 = clamp<int>(scissor.offset.x, 0, framebufferExtent.width);
390 data->scissorX1 = clamp<int>(scissor.offset.x + scissor.extent.width, 0, framebufferExtent.width);
391 data->scissorY0 = clamp<int>(scissor.offset.y, 0, framebufferExtent.height);
392 data->scissorY1 = clamp<int>(scissor.offset.y + scissor.extent.height, 0, framebufferExtent.height);
393 }
394
395 // Push constants
396 {
397 data->pushConstants = pushConstants;
398 }
399
400 draw->events = events;
401
402 DrawCall::run(draw, &drawTickets, clusterQueues);
403 }
404
setup()405 void DrawCall::setup()
406 {
407 if(occlusionQuery != nullptr)
408 {
409 occlusionQuery->start();
410 }
411
412 if(events)
413 {
414 events->start();
415 }
416 }
417
teardown()418 void DrawCall::teardown()
419 {
420 if(events)
421 {
422 events->finish();
423 events = nullptr;
424 }
425
426 if(occlusionQuery != nullptr)
427 {
428 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
429 {
430 occlusionQuery->add(data->occlusion[cluster]);
431 }
432 occlusionQuery->finish();
433 }
434
435 vertexRoutine = {};
436 setupRoutine = {};
437 pixelRoutine = {};
438 }
439
run(const marl::Loan<DrawCall> & draw,marl::Ticket::Queue * tickets,marl::Ticket::Queue clusterQueues[MaxClusterCount])440 void DrawCall::run(const marl::Loan<DrawCall> &draw, marl::Ticket::Queue *tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount])
441 {
442 draw->setup();
443
444 auto const numPrimitives = draw->numPrimitives;
445 auto const numPrimitivesPerBatch = draw->numPrimitivesPerBatch;
446 auto const numBatches = draw->numBatches;
447
448 auto ticket = tickets->take();
449 auto finally = marl::make_shared_finally([draw, ticket] {
450 MARL_SCOPED_EVENT("FINISH draw %d", draw->id);
451 draw->teardown();
452 ticket.done();
453 });
454
455 for(unsigned int batchId = 0; batchId < numBatches; batchId++)
456 {
457 auto batch = draw->batchDataPool->borrow();
458 batch->id = batchId;
459 batch->firstPrimitive = batch->id * numPrimitivesPerBatch;
460 batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive;
461
462 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
463 {
464 batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take());
465 }
466
467 marl::schedule([draw, batch, finally] {
468 processVertices(draw.get(), batch.get());
469
470 if(!draw->setupState.rasterizerDiscard)
471 {
472 processPrimitives(draw.get(), batch.get());
473
474 if(batch->numVisible > 0)
475 {
476 processPixels(draw, batch, finally);
477 return;
478 }
479 }
480
481 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
482 {
483 batch->clusterTickets[cluster].done();
484 }
485 });
486 }
487 }
488
processVertices(DrawCall * draw,BatchData * batch)489 void DrawCall::processVertices(DrawCall *draw, BatchData *batch)
490 {
491 MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id);
492
493 unsigned int triangleIndices[MaxBatchSize + 1][3]; // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size.
494 {
495 MARL_SCOPED_EVENT("processPrimitiveVertices");
496 processPrimitiveVertices(
497 triangleIndices,
498 draw->data->indices,
499 draw->indexType,
500 batch->firstPrimitive,
501 batch->numPrimitives,
502 draw->topology,
503 draw->provokingVertexMode);
504 }
505
506 auto &vertexTask = batch->vertexTask;
507 vertexTask.primitiveStart = batch->firstPrimitive;
508 // We're only using batch compaction for points, not lines
509 vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3);
510 if(vertexTask.vertexCache.drawCall != draw->id)
511 {
512 vertexTask.vertexCache.clear();
513 vertexTask.vertexCache.drawCall = draw->id;
514 }
515
516 draw->vertexRoutine(&batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data);
517 }
518
processPrimitives(DrawCall * draw,BatchData * batch)519 void DrawCall::processPrimitives(DrawCall *draw, BatchData *batch)
520 {
521 MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id);
522 auto triangles = &batch->triangles[0];
523 auto primitives = &batch->primitives[0];
524 batch->numVisible = draw->setupPrimitives(triangles, primitives, draw, batch->numPrimitives);
525 }
526
processPixels(const marl::Loan<DrawCall> & draw,const marl::Loan<BatchData> & batch,const std::shared_ptr<marl::Finally> & finally)527 void DrawCall::processPixels(const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
528 {
529 struct Data
530 {
531 Data(const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
532 : draw(draw)
533 , batch(batch)
534 , finally(finally)
535 {}
536 marl::Loan<DrawCall> draw;
537 marl::Loan<BatchData> batch;
538 std::shared_ptr<marl::Finally> finally;
539 };
540 auto data = std::make_shared<Data>(draw, batch, finally);
541 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
542 {
543 batch->clusterTickets[cluster].onCall([data, cluster] {
544 auto &draw = data->draw;
545 auto &batch = data->batch;
546 MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster);
547 draw->pixelRoutine(&batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data);
548 batch->clusterTickets[cluster].done();
549 });
550 }
551 }
552
synchronize()553 void Renderer::synchronize()
554 {
555 MARL_SCOPED_EVENT("synchronize");
556 auto ticket = drawTickets.take();
557 ticket.wait();
558 device->updateSamplingRoutineConstCache();
559 ticket.done();
560 }
561
processPrimitiveVertices(unsigned int triangleIndicesOut[MaxBatchSize+1][3],const void * primitiveIndices,VkIndexType indexType,unsigned int start,unsigned int triangleCount,VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode)562 void DrawCall::processPrimitiveVertices(
563 unsigned int triangleIndicesOut[MaxBatchSize + 1][3],
564 const void *primitiveIndices,
565 VkIndexType indexType,
566 unsigned int start,
567 unsigned int triangleCount,
568 VkPrimitiveTopology topology,
569 VkProvokingVertexModeEXT provokingVertexMode)
570 {
571 if(!primitiveIndices)
572 {
573 struct LinearIndex
574 {
575 unsigned int operator[](unsigned int i) { return i; }
576 };
577
578 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount))
579 {
580 return;
581 }
582 }
583 else
584 {
585 switch(indexType)
586 {
587 case VK_INDEX_TYPE_UINT16:
588 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint16_t *>(primitiveIndices), start, triangleCount))
589 {
590 return;
591 }
592 break;
593 case VK_INDEX_TYPE_UINT32:
594 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint32_t *>(primitiveIndices), start, triangleCount))
595 {
596 return;
597 }
598 break;
599 break;
600 default:
601 ASSERT(false);
602 return;
603 }
604 }
605
606 // setBatchIndices() takes care of the point case, since it's different due to the compaction
607 if(topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
608 {
609 // Repeat the last index to allow for SIMD width overrun.
610 triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2];
611 triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2];
612 triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2];
613 }
614 }
615
setupSolidTriangles(Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)616 int DrawCall::setupSolidTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
617 {
618 auto &state = drawCall->setupState;
619
620 int ms = state.multiSampleCount;
621 const DrawData *data = drawCall->data;
622 int visible = 0;
623
624 for(int i = 0; i < count; i++, triangles++)
625 {
626 Vertex &v0 = triangles->v0;
627 Vertex &v1 = triangles->v1;
628 Vertex &v2 = triangles->v2;
629
630 Polygon polygon(&v0.position, &v1.position, &v2.position);
631
632 if((v0.cullMask | v1.cullMask | v2.cullMask) == 0)
633 {
634 continue;
635 }
636
637 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) != Clipper::CLIP_FINITE)
638 {
639 continue;
640 }
641
642 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
643 if(clipFlagsOr != Clipper::CLIP_FINITE)
644 {
645 if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall))
646 {
647 continue;
648 }
649 }
650
651 if(drawCall->setupRoutine(primitives, triangles, &polygon, data))
652 {
653 primitives += ms;
654 visible++;
655 }
656 }
657
658 return visible;
659 }
660
setupWireframeTriangles(Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)661 int DrawCall::setupWireframeTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
662 {
663 auto &state = drawCall->setupState;
664
665 int ms = state.multiSampleCount;
666 int visible = 0;
667
668 for(int i = 0; i < count; i++)
669 {
670 const Vertex &v0 = triangles[i].v0;
671 const Vertex &v1 = triangles[i].v1;
672 const Vertex &v2 = triangles[i].v2;
673
674 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
675 (v0.x * v2.y - v0.y * v2.x) * v1.w +
676 (v2.x * v1.y - v1.x * v2.y) * v0.w;
677
678 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
679 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
680 {
681 if(frontFacing) continue;
682 }
683 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
684 {
685 if(!frontFacing) continue;
686 }
687
688 Triangle lines[3];
689 lines[0].v0 = v0;
690 lines[0].v1 = v1;
691 lines[1].v0 = v1;
692 lines[1].v1 = v2;
693 lines[2].v0 = v2;
694 lines[2].v1 = v0;
695
696 for(int i = 0; i < 3; i++)
697 {
698 if(setupLine(*primitives, lines[i], *drawCall))
699 {
700 primitives += ms;
701 visible++;
702 }
703 }
704 }
705
706 return visible;
707 }
708
setupPointTriangles(Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)709 int DrawCall::setupPointTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
710 {
711 auto &state = drawCall->setupState;
712
713 int ms = state.multiSampleCount;
714 int visible = 0;
715
716 for(int i = 0; i < count; i++)
717 {
718 const Vertex &v0 = triangles[i].v0;
719 const Vertex &v1 = triangles[i].v1;
720 const Vertex &v2 = triangles[i].v2;
721
722 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
723 (v0.x * v2.y - v0.y * v2.x) * v1.w +
724 (v2.x * v1.y - v1.x * v2.y) * v0.w;
725
726 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
727 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
728 {
729 if(frontFacing) continue;
730 }
731 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
732 {
733 if(!frontFacing) continue;
734 }
735
736 Triangle points[3];
737 points[0].v0 = v0;
738 points[1].v0 = v1;
739 points[2].v0 = v2;
740
741 for(int i = 0; i < 3; i++)
742 {
743 if(setupPoint(*primitives, points[i], *drawCall))
744 {
745 primitives += ms;
746 visible++;
747 }
748 }
749 }
750
751 return visible;
752 }
753
setupLines(Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)754 int DrawCall::setupLines(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
755 {
756 auto &state = drawCall->setupState;
757
758 int visible = 0;
759 int ms = state.multiSampleCount;
760
761 for(int i = 0; i < count; i++)
762 {
763 if(setupLine(*primitives, *triangles, *drawCall))
764 {
765 primitives += ms;
766 visible++;
767 }
768
769 triangles++;
770 }
771
772 return visible;
773 }
774
setupPoints(Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)775 int DrawCall::setupPoints(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
776 {
777 auto &state = drawCall->setupState;
778
779 int visible = 0;
780 int ms = state.multiSampleCount;
781
782 for(int i = 0; i < count; i++)
783 {
784 if(setupPoint(*primitives, *triangles, *drawCall))
785 {
786 primitives += ms;
787 visible++;
788 }
789
790 triangles++;
791 }
792
793 return visible;
794 }
795
setupLine(Primitive & primitive,Triangle & triangle,const DrawCall & draw)796 bool DrawCall::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
797 {
798 const DrawData &data = *draw.data;
799
800 float lineWidth = data.lineWidth;
801
802 Vertex &v0 = triangle.v0;
803 Vertex &v1 = triangle.v1;
804
805 if((v0.cullMask | v1.cullMask) == 0)
806 {
807 return false;
808 }
809
810 const float4 &P0 = v0.position;
811 const float4 &P1 = v1.position;
812
813 if(P0.w <= 0 && P1.w <= 0)
814 {
815 return false;
816 }
817
818 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
819
820 const float W = data.WxF[0] * (1.0f / subPixF);
821 const float H = data.HxF[0] * (1.0f / subPixF);
822
823 float dx = W * (P1.x / P1.w - P0.x / P0.w);
824 float dy = H * (P1.y / P1.w - P0.y / P0.w);
825
826 if(dx == 0 && dy == 0)
827 {
828 return false;
829 }
830
831 if(draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
832 {
833 // Rectangle centered on the line segment
834
835 float4 P[4];
836 int C[4];
837
838 P[0] = P0;
839 P[1] = P1;
840 P[2] = P1;
841 P[3] = P0;
842
843 float scale = lineWidth * 0.5f / sqrt(dx * dx + dy * dy);
844
845 dx *= scale;
846 dy *= scale;
847
848 float dx0h = dx * P0.w / H;
849 float dy0w = dy * P0.w / W;
850
851 float dx1h = dx * P1.w / H;
852 float dy1w = dy * P1.w / W;
853
854 P[0].x += -dy0w;
855 P[0].y += +dx0h;
856 C[0] = Clipper::ComputeClipFlags(P[0]);
857
858 P[1].x += -dy1w;
859 P[1].y += +dx1h;
860 C[1] = Clipper::ComputeClipFlags(P[1]);
861
862 P[2].x += +dy1w;
863 P[2].y += -dx1h;
864 C[2] = Clipper::ComputeClipFlags(P[2]);
865
866 P[3].x += +dy0w;
867 P[3].y += -dx0h;
868 C[3] = Clipper::ComputeClipFlags(P[3]);
869
870 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
871 {
872 Polygon polygon(P, 4);
873
874 int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
875
876 if(clipFlagsOr != Clipper::CLIP_FINITE)
877 {
878 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
879 {
880 return false;
881 }
882 }
883
884 return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
885 }
886 }
887 else if(false) // TODO(b/80135519): Deprecate
888 {
889 // Connecting diamonds polygon
890 // This shape satisfies the diamond test convention, except for the exit rule part.
891 // Line segments with overlapping endpoints have duplicate fragments.
892 // The ideal algorithm requires half-open line rasterization (b/80135519).
893
894 float4 P[8];
895 int C[8];
896
897 P[0] = P0;
898 P[1] = P0;
899 P[2] = P0;
900 P[3] = P0;
901 P[4] = P1;
902 P[5] = P1;
903 P[6] = P1;
904 P[7] = P1;
905
906 float dx0 = lineWidth * 0.5f * P0.w / W;
907 float dy0 = lineWidth * 0.5f * P0.w / H;
908
909 float dx1 = lineWidth * 0.5f * P1.w / W;
910 float dy1 = lineWidth * 0.5f * P1.w / H;
911
912 P[0].x += -dx0;
913 C[0] = Clipper::ComputeClipFlags(P[0]);
914
915 P[1].y += +dy0;
916 C[1] = Clipper::ComputeClipFlags(P[1]);
917
918 P[2].x += +dx0;
919 C[2] = Clipper::ComputeClipFlags(P[2]);
920
921 P[3].y += -dy0;
922 C[3] = Clipper::ComputeClipFlags(P[3]);
923
924 P[4].x += -dx1;
925 C[4] = Clipper::ComputeClipFlags(P[4]);
926
927 P[5].y += +dy1;
928 C[5] = Clipper::ComputeClipFlags(P[5]);
929
930 P[6].x += +dx1;
931 C[6] = Clipper::ComputeClipFlags(P[6]);
932
933 P[7].y += -dy1;
934 C[7] = Clipper::ComputeClipFlags(P[7]);
935
936 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
937 {
938 float4 L[6];
939
940 if(dx > -dy)
941 {
942 if(dx > dy) // Right
943 {
944 L[0] = P[0];
945 L[1] = P[1];
946 L[2] = P[5];
947 L[3] = P[6];
948 L[4] = P[7];
949 L[5] = P[3];
950 }
951 else // Down
952 {
953 L[0] = P[0];
954 L[1] = P[4];
955 L[2] = P[5];
956 L[3] = P[6];
957 L[4] = P[2];
958 L[5] = P[3];
959 }
960 }
961 else
962 {
963 if(dx > dy) // Up
964 {
965 L[0] = P[0];
966 L[1] = P[1];
967 L[2] = P[2];
968 L[3] = P[6];
969 L[4] = P[7];
970 L[5] = P[4];
971 }
972 else // Left
973 {
974 L[0] = P[1];
975 L[1] = P[2];
976 L[2] = P[3];
977 L[3] = P[7];
978 L[4] = P[4];
979 L[5] = P[5];
980 }
981 }
982
983 Polygon polygon(L, 6);
984
985 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7];
986
987 if(clipFlagsOr != Clipper::CLIP_FINITE)
988 {
989 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
990 {
991 return false;
992 }
993 }
994
995 return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
996 }
997 }
998 else
999 {
1000 // Parallelogram approximating Bresenham line
1001 // This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the
1002 // duplicate fragment rasterization problem and satisfies all of Vulkan's minimum
1003 // requirements for Bresenham line segment rasterization.
1004
1005 float4 P[8];
1006 P[0] = P0;
1007 P[1] = P0;
1008 P[2] = P0;
1009 P[3] = P0;
1010 P[4] = P1;
1011 P[5] = P1;
1012 P[6] = P1;
1013 P[7] = P1;
1014
1015 float dx0 = lineWidth * 0.5f * P0.w / W;
1016 float dy0 = lineWidth * 0.5f * P0.w / H;
1017
1018 float dx1 = lineWidth * 0.5f * P1.w / W;
1019 float dy1 = lineWidth * 0.5f * P1.w / H;
1020
1021 P[0].x += -dx0;
1022 P[1].y += +dy0;
1023 P[2].x += +dx0;
1024 P[3].y += -dy0;
1025 P[4].x += -dx1;
1026 P[5].y += +dy1;
1027 P[6].x += +dx1;
1028 P[7].y += -dy1;
1029
1030 float4 L[4];
1031
1032 if(dx > -dy)
1033 {
1034 if(dx > dy) // Right
1035 {
1036 L[0] = P[1];
1037 L[1] = P[5];
1038 L[2] = P[7];
1039 L[3] = P[3];
1040 }
1041 else // Down
1042 {
1043 L[0] = P[0];
1044 L[1] = P[4];
1045 L[2] = P[6];
1046 L[3] = P[2];
1047 }
1048 }
1049 else
1050 {
1051 if(dx > dy) // Up
1052 {
1053 L[0] = P[0];
1054 L[1] = P[2];
1055 L[2] = P[6];
1056 L[3] = P[4];
1057 }
1058 else // Left
1059 {
1060 L[0] = P[1];
1061 L[1] = P[3];
1062 L[2] = P[7];
1063 L[3] = P[5];
1064 }
1065 }
1066
1067 int C0 = Clipper::ComputeClipFlags(L[0]);
1068 int C1 = Clipper::ComputeClipFlags(L[1]);
1069 int C2 = Clipper::ComputeClipFlags(L[2]);
1070 int C3 = Clipper::ComputeClipFlags(L[3]);
1071
1072 if((C0 & C1 & C2 & C3) == Clipper::CLIP_FINITE)
1073 {
1074 Polygon polygon(L, 4);
1075
1076 int clipFlagsOr = C0 | C1 | C2 | C3;
1077
1078 if(clipFlagsOr != Clipper::CLIP_FINITE)
1079 {
1080 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1081 {
1082 return false;
1083 }
1084 }
1085
1086 return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
1087 }
1088 }
1089
1090 return false;
1091 }
1092
setupPoint(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1093 bool DrawCall::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1094 {
1095 const DrawData &data = *draw.data;
1096
1097 Vertex &v = triangle.v0;
1098
1099 if(v.cullMask == 0)
1100 {
1101 return false;
1102 }
1103
1104 float pSize = v.pointSize;
1105
1106 pSize = clamp(pSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE));
1107
1108 float4 P[4];
1109 int C[4];
1110
1111 P[0] = v.position;
1112 P[1] = v.position;
1113 P[2] = v.position;
1114 P[3] = v.position;
1115
1116 const float X = pSize * P[0].w * data.halfPixelX[0];
1117 const float Y = pSize * P[0].w * data.halfPixelY[0];
1118
1119 P[0].x -= X;
1120 P[0].y += Y;
1121 C[0] = Clipper::ComputeClipFlags(P[0]);
1122
1123 P[1].x += X;
1124 P[1].y += Y;
1125 C[1] = Clipper::ComputeClipFlags(P[1]);
1126
1127 P[2].x += X;
1128 P[2].y -= Y;
1129 C[2] = Clipper::ComputeClipFlags(P[2]);
1130
1131 P[3].x -= X;
1132 P[3].y -= Y;
1133 C[3] = Clipper::ComputeClipFlags(P[3]);
1134
1135 Polygon polygon(P, 4);
1136
1137 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1138 {
1139 int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
1140
1141 if(clipFlagsOr != Clipper::CLIP_FINITE)
1142 {
1143 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1144 {
1145 return false;
1146 }
1147 }
1148
1149 triangle.v1 = triangle.v0;
1150 triangle.v2 = triangle.v0;
1151
1152 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
1153
1154 triangle.v1.projected.x += iround(subPixF * 0.5f * pSize);
1155 triangle.v2.projected.y -= iround(subPixF * 0.5f * pSize) * (data.HxF[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1156 return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
1157 }
1158
1159 return false;
1160 }
1161
addQuery(vk::Query * query)1162 void Renderer::addQuery(vk::Query *query)
1163 {
1164 ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1165 ASSERT(!occlusionQuery);
1166
1167 occlusionQuery = query;
1168 }
1169
removeQuery(vk::Query * query)1170 void Renderer::removeQuery(vk::Query *query)
1171 {
1172 ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1173 ASSERT(occlusionQuery == query);
1174
1175 occlusionQuery = nullptr;
1176 }
1177
1178 // TODO(b/137740918): Optimize instancing to use a single draw call.
advanceInstanceAttributes(Stream * inputs)1179 void Renderer::advanceInstanceAttributes(Stream *inputs)
1180 {
1181 for(uint32_t i = 0; i < vk::MAX_VERTEX_INPUT_BINDINGS; i++)
1182 {
1183 auto &attrib = inputs[i];
1184 if((attrib.format != VK_FORMAT_UNDEFINED) && attrib.instanceStride && (attrib.instanceStride < attrib.robustnessSize))
1185 {
1186 // Under the casts: attrib.buffer += attrib.instanceStride
1187 attrib.buffer = (void const *)((uintptr_t)attrib.buffer + attrib.instanceStride);
1188 attrib.robustnessSize -= attrib.instanceStride;
1189 }
1190 }
1191 }
1192
setViewport(const VkViewport & viewport)1193 void Renderer::setViewport(const VkViewport &viewport)
1194 {
1195 this->viewport = viewport;
1196 }
1197
setScissor(const VkRect2D & scissor)1198 void Renderer::setScissor(const VkRect2D &scissor)
1199 {
1200 this->scissor = scissor;
1201 }
1202
1203 } // namespace sw
1204