// Copyright 2016 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "Renderer.hpp" #include "Clipper.hpp" #include "Polygon.hpp" #include "Primitive.hpp" #include "Vertex.hpp" #include "Pipeline/Constants.hpp" #include "Pipeline/SpirvShader.hpp" #include "Reactor/Reactor.hpp" #include "System/Debug.hpp" #include "System/Half.hpp" #include "System/Math.hpp" #include "System/Memory.hpp" #include "System/Timer.hpp" #include "Vulkan/VkConfig.hpp" #include "Vulkan/VkDescriptorSet.hpp" #include "Vulkan/VkDevice.hpp" #include "Vulkan/VkFence.hpp" #include "Vulkan/VkImageView.hpp" #include "Vulkan/VkPipelineLayout.hpp" #include "Vulkan/VkQueryPool.hpp" #include "marl/containers.h" #include "marl/defer.h" #include "marl/trace.h" #undef max #ifndef NDEBUG unsigned int minPrimitives = 1; unsigned int maxPrimitives = 1 << 21; #endif namespace sw { template inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount) { bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT); switch(topology) { case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: { auto index = start; auto pointBatch = &(batch[0][0]); for(unsigned int i = 0; i < triangleCount; i++) { *pointBatch++ = indices[index++]; } // Repeat the last index to allow for SIMD width overrun. index--; for(unsigned int i = 0; i < 3; i++) { *pointBatch++ = indices[index]; } break; } case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: { auto index = 2 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = indices[index + (provokeFirst ? 0 : 1)]; batch[i][1] = indices[index + (provokeFirst ? 1 : 0)]; batch[i][2] = indices[index + 1]; index += 2; } break; } case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: { auto index = start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = indices[index + (provokeFirst ? 0 : 1)]; batch[i][1] = indices[index + (provokeFirst ? 1 : 0)]; batch[i][2] = indices[index + 1]; index += 1; } break; } case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: { auto index = 3 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = indices[index + (provokeFirst ? 0 : 2)]; batch[i][1] = indices[index + (provokeFirst ? 1 : 0)]; batch[i][2] = indices[index + (provokeFirst ? 2 : 1)]; index += 3; } break; } case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: { auto index = start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = indices[index + (provokeFirst ? 0 : 2)]; batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)]; batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)]; index += 1; } break; } case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: { auto index = start + 1; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][provokeFirst ? 0 : 2] = indices[index + 0]; batch[i][provokeFirst ? 1 : 0] = indices[index + 1]; batch[i][provokeFirst ? 2 : 1] = indices[0]; index += 1; } break; } default: ASSERT(false); return false; } return true; } DrawCall::DrawCall() { data = (DrawData *)allocate(sizeof(DrawData)); data->constants = &Constants::Get(); } DrawCall::~DrawCall() { deallocate(data); } Renderer::Renderer(vk::Device *device) : device(device) { vertexProcessor.setRoutineCacheSize(1024); pixelProcessor.setRoutineCacheSize(1024); setupProcessor.setRoutineCacheSize(1024); } Renderer::~Renderer() { drawTickets.take().wait(); } // Renderer objects have to be mem aligned to the alignment provided in the class declaration void *Renderer::operator new(size_t size) { ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class return vk::allocate(sizeof(Renderer), alignof(Renderer), vk::DEVICE_MEMORY, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); } void Renderer::operator delete(void *mem) { vk::deallocate(mem, vk::DEVICE_MEMORY); } void Renderer::draw(const vk::GraphicsPipeline *pipeline, const vk::DynamicState &dynamicState, unsigned int count, int baseVertex, CountedEvent *events, int instanceID, int viewID, void *indexBuffer, const VkExtent3D &framebufferExtent, vk::Pipeline::PushConstantStorage const &pushConstants, bool update) { if(count == 0) { return; } auto id = nextDrawID++; MARL_SCOPED_EVENT("draw %d", id); marl::Pool::Loan draw; { MARL_SCOPED_EVENT("drawCallPool.borrow()"); draw = drawCallPool.borrow(); } draw->id = id; const vk::GraphicsState &pipelineState = pipeline->getState(dynamicState); pixelProcessor.setBlendConstant(pipelineState.getBlendConstants()); const vk::Inputs &inputs = pipeline->getInputs(); if(update) { MARL_SCOPED_EVENT("update"); const sw::SpirvShader *fragmentShader = pipeline->getShader(VK_SHADER_STAGE_FRAGMENT_BIT).get(); const sw::SpirvShader *vertexShader = pipeline->getShader(VK_SHADER_STAGE_VERTEX_BIT).get(); const vk::Attachments attachments = pipeline->getAttachments(); vertexState = vertexProcessor.update(pipelineState, vertexShader, inputs); setupState = setupProcessor.update(pipelineState, fragmentShader, vertexShader, attachments); pixelState = pixelProcessor.update(pipelineState, fragmentShader, vertexShader, attachments, hasOcclusionQuery()); vertexRoutine = vertexProcessor.routine(vertexState, pipelineState.getPipelineLayout(), vertexShader, inputs.getDescriptorSets()); setupRoutine = setupProcessor.routine(setupState); pixelRoutine = pixelProcessor.routine(pixelState, pipelineState.getPipelineLayout(), fragmentShader, inputs.getDescriptorSets()); } draw->containsImageWrite = pipeline->containsImageWrite(); DrawCall::SetupFunction setupPrimitives = nullptr; int ms = pipelineState.getSampleCount(); unsigned int numPrimitivesPerBatch = MaxBatchSize / ms; if(pipelineState.isDrawTriangle(false)) { switch(pipelineState.getPolygonMode()) { case VK_POLYGON_MODE_FILL: setupPrimitives = &DrawCall::setupSolidTriangles; break; case VK_POLYGON_MODE_LINE: setupPrimitives = &DrawCall::setupWireframeTriangles; numPrimitivesPerBatch /= 3; break; case VK_POLYGON_MODE_POINT: setupPrimitives = &DrawCall::setupPointTriangles; numPrimitivesPerBatch /= 3; break; default: UNSUPPORTED("polygon mode: %d", int(pipelineState.getPolygonMode())); return; } } else if(pipelineState.isDrawLine(false)) { setupPrimitives = &DrawCall::setupLines; } else // Point primitive topology { setupPrimitives = &DrawCall::setupPoints; } DrawData *data = draw->data; draw->device = device; draw->occlusionQuery = occlusionQuery; draw->batchDataPool = &batchDataPool; draw->numPrimitives = count; draw->numPrimitivesPerBatch = numPrimitivesPerBatch; draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch; draw->topology = pipelineState.getTopology(); draw->provokingVertexMode = pipelineState.getProvokingVertexMode(); draw->indexType = pipeline->getIndexBuffer().getIndexType(); draw->lineRasterizationMode = pipelineState.getLineRasterizationMode(); draw->descriptorSetObjects = inputs.getDescriptorSetObjects(); draw->pipelineLayout = pipelineState.getPipelineLayout(); draw->vertexRoutine = vertexRoutine; draw->setupRoutine = setupRoutine; draw->pixelRoutine = pixelRoutine; draw->setupPrimitives = setupPrimitives; draw->setupState = setupState; data->descriptorSets = inputs.getDescriptorSets(); data->descriptorDynamicOffsets = inputs.getDescriptorDynamicOffsets(); for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++) { const sw::Stream &stream = inputs.getStream(i); data->input[i] = stream.buffer; data->robustnessSize[i] = stream.robustnessSize; data->stride[i] = stream.vertexStride; } data->indices = indexBuffer; data->viewID = viewID; data->instanceID = instanceID; data->baseVertex = baseVertex; if(pixelState.stencilActive) { data->stencil[0].set(pipelineState.getFrontStencil().reference, pipelineState.getFrontStencil().compareMask, pipelineState.getFrontStencil().writeMask); data->stencil[1].set(pipelineState.getBackStencil().reference, pipelineState.getBackStencil().compareMask, pipelineState.getBackStencil().writeMask); } data->lineWidth = pipelineState.getLineWidth(); data->factor = pixelProcessor.factor; if(pixelState.alphaToCoverage) { if(ms == 4) { data->a2c0 = float4(0.2f); data->a2c1 = float4(0.4f); data->a2c2 = float4(0.6f); data->a2c3 = float4(0.8f); } else if(ms == 2) { data->a2c0 = float4(0.25f); data->a2c1 = float4(0.75f); } else if(ms == 1) { data->a2c0 = float4(0.5f); } else ASSERT(false); } if(pixelState.occlusionEnabled) { for(int cluster = 0; cluster < MaxClusterCount; cluster++) { data->occlusion[cluster] = 0; } } // Viewport { const VkViewport &viewport = pipelineState.getViewport(); float W = 0.5f * viewport.width; float H = 0.5f * viewport.height; float X0 = viewport.x + W; float Y0 = viewport.y + H; float N = viewport.minDepth; float F = viewport.maxDepth; float Z = F - N; constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR; data->WxF = float4(W * subPixF); data->HxF = float4(H * subPixF); data->X0xF = float4(X0 * subPixF - subPixF / 2); data->Y0xF = float4(Y0 * subPixF - subPixF / 2); data->halfPixelX = float4(0.5f / W); data->halfPixelY = float4(0.5f / H); data->viewportHeight = abs(viewport.height); data->depthRange = Z; data->depthNear = N; data->constantDepthBias = pipelineState.getConstantDepthBias(); data->slopeDepthBias = pipelineState.getSlopeDepthBias(); data->depthBiasClamp = pipelineState.getDepthBiasClamp(); const vk::Attachments attachments = pipeline->getAttachments(); if(attachments.depthBuffer) { switch(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT)) { case VK_FORMAT_D16_UNORM: data->minimumResolvableDepthDifference = 1.0f / 0xFFFF; break; case VK_FORMAT_D32_SFLOAT: // The minimum resolvable depth difference is determined per-polygon for floating-point depth // buffers. DrawData::minimumResolvableDepthDifference is unused. break; default: UNSUPPORTED("Depth format: %d", int(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT))); } } } // Target { const vk::Attachments attachments = pipeline->getAttachments(); for(int index = 0; index < RENDERTARGETS; index++) { draw->renderTarget[index] = attachments.renderTarget[index]; if(draw->renderTarget[index]) { data->colorBuffer[index] = (unsigned int *)attachments.renderTarget[index]->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->viewID); data->colorPitchB[index] = attachments.renderTarget[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0); data->colorSliceB[index] = attachments.renderTarget[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0); } } draw->depthBuffer = attachments.depthBuffer; draw->stencilBuffer = attachments.stencilBuffer; if(draw->depthBuffer) { data->depthBuffer = (float *)attachments.depthBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->viewID); data->depthPitchB = attachments.depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0); data->depthSliceB = attachments.depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0); } if(draw->stencilBuffer) { data->stencilBuffer = (unsigned char *)attachments.stencilBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->viewID); data->stencilPitchB = attachments.stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0); data->stencilSliceB = attachments.stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0); } } // Scissor { const VkRect2D &scissor = pipelineState.getScissor(); data->scissorX0 = clamp(scissor.offset.x, 0, framebufferExtent.width); data->scissorX1 = clamp(scissor.offset.x + scissor.extent.width, 0, framebufferExtent.width); data->scissorY0 = clamp(scissor.offset.y, 0, framebufferExtent.height); data->scissorY1 = clamp(scissor.offset.y + scissor.extent.height, 0, framebufferExtent.height); } // Push constants { data->pushConstants = pushConstants; } draw->events = events; vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->pipelineLayout, device); DrawCall::run(draw, &drawTickets, clusterQueues); } void DrawCall::setup() { if(occlusionQuery != nullptr) { occlusionQuery->start(); } if(events) { events->add(); } } void DrawCall::teardown() { if(events) { events->done(); events = nullptr; } if(occlusionQuery != nullptr) { for(int cluster = 0; cluster < MaxClusterCount; cluster++) { occlusionQuery->add(data->occlusion[cluster]); } occlusionQuery->finish(); } vertexRoutine = {}; setupRoutine = {}; pixelRoutine = {}; for(auto *rt : renderTarget) { if(rt) { rt->contentsChanged(); } } if(containsImageWrite) { vk::DescriptorSet::ContentsChanged(descriptorSetObjects, pipelineLayout, device); } } void DrawCall::run(const marl::Loan &draw, marl::Ticket::Queue *tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount]) { draw->setup(); auto const numPrimitives = draw->numPrimitives; auto const numPrimitivesPerBatch = draw->numPrimitivesPerBatch; auto const numBatches = draw->numBatches; auto ticket = tickets->take(); auto finally = marl::make_shared_finally([draw, ticket] { MARL_SCOPED_EVENT("FINISH draw %d", draw->id); draw->teardown(); ticket.done(); }); for(unsigned int batchId = 0; batchId < numBatches; batchId++) { auto batch = draw->batchDataPool->borrow(); batch->id = batchId; batch->firstPrimitive = batch->id * numPrimitivesPerBatch; batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive; for(int cluster = 0; cluster < MaxClusterCount; cluster++) { batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take()); } marl::schedule([draw, batch, finally] { processVertices(draw.get(), batch.get()); if(!draw->setupState.rasterizerDiscard) { processPrimitives(draw.get(), batch.get()); if(batch->numVisible > 0) { processPixels(draw, batch, finally); return; } } for(int cluster = 0; cluster < MaxClusterCount; cluster++) { batch->clusterTickets[cluster].done(); } }); } } void DrawCall::processVertices(DrawCall *draw, BatchData *batch) { MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id); unsigned int triangleIndices[MaxBatchSize + 1][3]; // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size. { MARL_SCOPED_EVENT("processPrimitiveVertices"); processPrimitiveVertices( triangleIndices, draw->data->indices, draw->indexType, batch->firstPrimitive, batch->numPrimitives, draw->topology, draw->provokingVertexMode); } auto &vertexTask = batch->vertexTask; vertexTask.primitiveStart = batch->firstPrimitive; // We're only using batch compaction for points, not lines vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3); if(vertexTask.vertexCache.drawCall != draw->id) { vertexTask.vertexCache.clear(); vertexTask.vertexCache.drawCall = draw->id; } draw->vertexRoutine(&batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data); } void DrawCall::processPrimitives(DrawCall *draw, BatchData *batch) { MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id); auto triangles = &batch->triangles[0]; auto primitives = &batch->primitives[0]; batch->numVisible = draw->setupPrimitives(triangles, primitives, draw, batch->numPrimitives); } void DrawCall::processPixels(const marl::Loan &draw, const marl::Loan &batch, const std::shared_ptr &finally) { struct Data { Data(const marl::Loan &draw, const marl::Loan &batch, const std::shared_ptr &finally) : draw(draw) , batch(batch) , finally(finally) {} marl::Loan draw; marl::Loan batch; std::shared_ptr finally; }; auto data = std::make_shared(draw, batch, finally); for(int cluster = 0; cluster < MaxClusterCount; cluster++) { batch->clusterTickets[cluster].onCall([data, cluster] { auto &draw = data->draw; auto &batch = data->batch; MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster); draw->pixelRoutine(&batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data); batch->clusterTickets[cluster].done(); }); } } void Renderer::synchronize() { MARL_SCOPED_EVENT("synchronize"); auto ticket = drawTickets.take(); ticket.wait(); device->updateSamplingRoutineSnapshotCache(); ticket.done(); } void DrawCall::processPrimitiveVertices( unsigned int triangleIndicesOut[MaxBatchSize + 1][3], const void *primitiveIndices, VkIndexType indexType, unsigned int start, unsigned int triangleCount, VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode) { if(!primitiveIndices) { struct LinearIndex { unsigned int operator[](unsigned int i) { return i; } }; if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount)) { return; } } else { switch(indexType) { case VK_INDEX_TYPE_UINT16: if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast(primitiveIndices), start, triangleCount)) { return; } break; case VK_INDEX_TYPE_UINT32: if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast(primitiveIndices), start, triangleCount)) { return; } break; break; default: ASSERT(false); return; } } // setBatchIndices() takes care of the point case, since it's different due to the compaction if(topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST) { // Repeat the last index to allow for SIMD width overrun. triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2]; triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2]; triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2]; } } int DrawCall::setupSolidTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) { auto &state = drawCall->setupState; int ms = state.multiSampleCount; const DrawData *data = drawCall->data; int visible = 0; for(int i = 0; i < count; i++, triangles++) { Vertex &v0 = triangles->v0; Vertex &v1 = triangles->v1; Vertex &v2 = triangles->v2; Polygon polygon(&v0.position, &v1.position, &v2.position); if((v0.cullMask | v1.cullMask | v2.cullMask) == 0) { continue; } if((v0.clipFlags & v1.clipFlags & v2.clipFlags) != Clipper::CLIP_FINITE) { continue; } int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags; if(clipFlagsOr != Clipper::CLIP_FINITE) { if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall)) { continue; } } if(drawCall->setupRoutine(primitives, triangles, &polygon, data)) { primitives += ms; visible++; } } return visible; } int DrawCall::setupWireframeTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) { auto &state = drawCall->setupState; int ms = state.multiSampleCount; int visible = 0; for(int i = 0; i < count; i++) { const Vertex &v0 = triangles[i].v0; const Vertex &v1 = triangles[i].v1; const Vertex &v2 = triangles[i].v2; float A = ((float)v0.projected.y - (float)v2.projected.y) * (float)v1.projected.x + ((float)v2.projected.y - (float)v1.projected.y) * (float)v0.projected.x + ((float)v1.projected.y - (float)v0.projected.y) * (float)v2.projected.x; // Area int w0w1w2 = bit_cast(v0.w) ^ bit_cast(v1.w) ^ bit_cast(v2.w); A = w0w1w2 < 0 ? -A : A; bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (A >= 0.0f) : (A <= 0.0f); if(state.cullMode & VK_CULL_MODE_FRONT_BIT) { if(frontFacing) continue; } if(state.cullMode & VK_CULL_MODE_BACK_BIT) { if(!frontFacing) continue; } Triangle lines[3]; lines[0].v0 = v0; lines[0].v1 = v1; lines[1].v0 = v1; lines[1].v1 = v2; lines[2].v0 = v2; lines[2].v1 = v0; for(int i = 0; i < 3; i++) { if(setupLine(*primitives, lines[i], *drawCall)) { primitives += ms; visible++; } } } return visible; } int DrawCall::setupPointTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) { auto &state = drawCall->setupState; int ms = state.multiSampleCount; int visible = 0; for(int i = 0; i < count; i++) { const Vertex &v0 = triangles[i].v0; const Vertex &v1 = triangles[i].v1; const Vertex &v2 = triangles[i].v2; float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0); if(state.cullMode & VK_CULL_MODE_FRONT_BIT) { if(frontFacing) continue; } if(state.cullMode & VK_CULL_MODE_BACK_BIT) { if(!frontFacing) continue; } Triangle points[3]; points[0].v0 = v0; points[1].v0 = v1; points[2].v0 = v2; for(int i = 0; i < 3; i++) { if(setupPoint(*primitives, points[i], *drawCall)) { primitives += ms; visible++; } } } return visible; } int DrawCall::setupLines(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) { auto &state = drawCall->setupState; int visible = 0; int ms = state.multiSampleCount; for(int i = 0; i < count; i++) { if(setupLine(*primitives, *triangles, *drawCall)) { primitives += ms; visible++; } triangles++; } return visible; } int DrawCall::setupPoints(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) { auto &state = drawCall->setupState; int visible = 0; int ms = state.multiSampleCount; for(int i = 0; i < count; i++) { if(setupPoint(*primitives, *triangles, *drawCall)) { primitives += ms; visible++; } triangles++; } return visible; } bool DrawCall::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw) { const DrawData &data = *draw.data; float lineWidth = data.lineWidth; Vertex &v0 = triangle.v0; Vertex &v1 = triangle.v1; if((v0.cullMask | v1.cullMask) == 0) { return false; } const float4 &P0 = v0.position; const float4 &P1 = v1.position; if(P0.w <= 0 && P1.w <= 0) { return false; } constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR; const float W = data.WxF[0] * (1.0f / subPixF); const float H = data.HxF[0] * (1.0f / subPixF); float dx = W * (P1.x / P1.w - P0.x / P0.w); float dy = H * (P1.y / P1.w - P0.y / P0.w); if(dx == 0 && dy == 0) { return false; } if(draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) { // Rectangle centered on the line segment float4 P[4]; int C[4]; P[0] = P0; P[1] = P1; P[2] = P1; P[3] = P0; float scale = lineWidth * 0.5f / sqrt(dx * dx + dy * dy); dx *= scale; dy *= scale; float dx0h = dx * P0.w / H; float dy0w = dy * P0.w / W; float dx1h = dx * P1.w / H; float dy1w = dy * P1.w / W; P[0].x += -dy0w; P[0].y += +dx0h; C[0] = Clipper::ComputeClipFlags(P[0]); P[1].x += -dy1w; P[1].y += +dx1h; C[1] = Clipper::ComputeClipFlags(P[1]); P[2].x += +dy1w; P[2].y += -dx1h; C[2] = Clipper::ComputeClipFlags(P[2]); P[3].x += +dy0w; P[3].y += -dx0h; C[3] = Clipper::ComputeClipFlags(P[3]); if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) { Polygon polygon(P, 4); int clipFlagsOr = C[0] | C[1] | C[2] | C[3]; if(clipFlagsOr != Clipper::CLIP_FINITE) { if(!Clipper::Clip(polygon, clipFlagsOr, draw)) { return false; } } return draw.setupRoutine(&primitive, &triangle, &polygon, &data); } } else if(false) // TODO(b/80135519): Deprecate { // Connecting diamonds polygon // This shape satisfies the diamond test convention, except for the exit rule part. // Line segments with overlapping endpoints have duplicate fragments. // The ideal algorithm requires half-open line rasterization (b/80135519). float4 P[8]; int C[8]; P[0] = P0; P[1] = P0; P[2] = P0; P[3] = P0; P[4] = P1; P[5] = P1; P[6] = P1; P[7] = P1; float dx0 = lineWidth * 0.5f * P0.w / W; float dy0 = lineWidth * 0.5f * P0.w / H; float dx1 = lineWidth * 0.5f * P1.w / W; float dy1 = lineWidth * 0.5f * P1.w / H; P[0].x += -dx0; C[0] = Clipper::ComputeClipFlags(P[0]); P[1].y += +dy0; C[1] = Clipper::ComputeClipFlags(P[1]); P[2].x += +dx0; C[2] = Clipper::ComputeClipFlags(P[2]); P[3].y += -dy0; C[3] = Clipper::ComputeClipFlags(P[3]); P[4].x += -dx1; C[4] = Clipper::ComputeClipFlags(P[4]); P[5].y += +dy1; C[5] = Clipper::ComputeClipFlags(P[5]); P[6].x += +dx1; C[6] = Clipper::ComputeClipFlags(P[6]); P[7].y += -dy1; C[7] = Clipper::ComputeClipFlags(P[7]); if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE) { float4 L[6]; if(dx > -dy) { if(dx > dy) // Right { L[0] = P[0]; L[1] = P[1]; L[2] = P[5]; L[3] = P[6]; L[4] = P[7]; L[5] = P[3]; } else // Down { L[0] = P[0]; L[1] = P[4]; L[2] = P[5]; L[3] = P[6]; L[4] = P[2]; L[5] = P[3]; } } else { if(dx > dy) // Up { L[0] = P[0]; L[1] = P[1]; L[2] = P[2]; L[3] = P[6]; L[4] = P[7]; L[5] = P[4]; } else // Left { L[0] = P[1]; L[1] = P[2]; L[2] = P[3]; L[3] = P[7]; L[4] = P[4]; L[5] = P[5]; } } Polygon polygon(L, 6); int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7]; if(clipFlagsOr != Clipper::CLIP_FINITE) { if(!Clipper::Clip(polygon, clipFlagsOr, draw)) { return false; } } return draw.setupRoutine(&primitive, &triangle, &polygon, &data); } } else { // Parallelogram approximating Bresenham line // This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the // duplicate fragment rasterization problem and satisfies all of Vulkan's minimum // requirements for Bresenham line segment rasterization. float4 P[8]; P[0] = P0; P[1] = P0; P[2] = P0; P[3] = P0; P[4] = P1; P[5] = P1; P[6] = P1; P[7] = P1; float dx0 = lineWidth * 0.5f * P0.w / W; float dy0 = lineWidth * 0.5f * P0.w / H; float dx1 = lineWidth * 0.5f * P1.w / W; float dy1 = lineWidth * 0.5f * P1.w / H; P[0].x += -dx0; P[1].y += +dy0; P[2].x += +dx0; P[3].y += -dy0; P[4].x += -dx1; P[5].y += +dy1; P[6].x += +dx1; P[7].y += -dy1; float4 L[4]; if(dx > -dy) { if(dx > dy) // Right { L[0] = P[1]; L[1] = P[5]; L[2] = P[7]; L[3] = P[3]; } else // Down { L[0] = P[0]; L[1] = P[4]; L[2] = P[6]; L[3] = P[2]; } } else { if(dx > dy) // Up { L[0] = P[0]; L[1] = P[2]; L[2] = P[6]; L[3] = P[4]; } else // Left { L[0] = P[1]; L[1] = P[3]; L[2] = P[7]; L[3] = P[5]; } } int C0 = Clipper::ComputeClipFlags(L[0]); int C1 = Clipper::ComputeClipFlags(L[1]); int C2 = Clipper::ComputeClipFlags(L[2]); int C3 = Clipper::ComputeClipFlags(L[3]); if((C0 & C1 & C2 & C3) == Clipper::CLIP_FINITE) { Polygon polygon(L, 4); int clipFlagsOr = C0 | C1 | C2 | C3; if(clipFlagsOr != Clipper::CLIP_FINITE) { if(!Clipper::Clip(polygon, clipFlagsOr, draw)) { return false; } } return draw.setupRoutine(&primitive, &triangle, &polygon, &data); } } return false; } bool DrawCall::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw) { const DrawData &data = *draw.data; Vertex &v = triangle.v0; if(v.cullMask == 0) { return false; } float pSize = v.pointSize; pSize = clamp(pSize, 1.0f, static_cast(vk::MAX_POINT_SIZE)); float4 P[4]; int C[4]; P[0] = v.position; P[1] = v.position; P[2] = v.position; P[3] = v.position; const float X = pSize * P[0].w * data.halfPixelX[0]; const float Y = pSize * P[0].w * data.halfPixelY[0]; P[0].x -= X; P[0].y += Y; C[0] = Clipper::ComputeClipFlags(P[0]); P[1].x += X; P[1].y += Y; C[1] = Clipper::ComputeClipFlags(P[1]); P[2].x += X; P[2].y -= Y; C[2] = Clipper::ComputeClipFlags(P[2]); P[3].x -= X; P[3].y -= Y; C[3] = Clipper::ComputeClipFlags(P[3]); Polygon polygon(P, 4); if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) { int clipFlagsOr = C[0] | C[1] | C[2] | C[3]; if(clipFlagsOr != Clipper::CLIP_FINITE) { if(!Clipper::Clip(polygon, clipFlagsOr, draw)) { return false; } } primitive.pointSizeInv = 1.0f / pSize; return draw.setupRoutine(&primitive, &triangle, &polygon, &data); } return false; } void Renderer::addQuery(vk::Query *query) { ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION); ASSERT(!occlusionQuery); occlusionQuery = query; } void Renderer::removeQuery(vk::Query *query) { ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION); ASSERT(occlusionQuery == query); occlusionQuery = nullptr; } } // namespace sw