// Copyright 2018 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "VkQueue.hpp" #include "VkCommandBuffer.hpp" #include "VkFence.hpp" #include "VkSemaphore.hpp" #include "VkStringify.hpp" #include "VkTimelineSemaphore.hpp" #include "Device/Renderer.hpp" #include "WSI/VkSwapchainKHR.hpp" #include "marl/defer.h" #include "marl/scheduler.h" #include "marl/thread.h" #include "marl/trace.h" #include namespace { VkSubmitInfo *DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo *pSubmits) { size_t submitSize = sizeof(VkSubmitInfo) * submitCount; size_t totalSize = submitSize; for(uint32_t i = 0; i < submitCount; i++) { totalSize += pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore); totalSize += pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags); totalSize += pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore); totalSize += pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer); for(const auto *extension = reinterpret_cast(pSubmits[i].pNext); extension != nullptr; extension = reinterpret_cast(extension->pNext)) { switch(extension->sType) { case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO: { const auto *tlsSubmitInfo = reinterpret_cast(extension); totalSize += sizeof(VkTimelineSemaphoreSubmitInfo); totalSize += tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t); totalSize += tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t); } break; default: WARN("submitInfo[%d]->pNext sType: %s", i, vk::Stringify(extension->sType).c_str()); break; } } } uint8_t *mem = static_cast( vk::allocate(totalSize, vk::REQUIRED_MEMORY_ALIGNMENT, vk::DEVICE_MEMORY, vk::Fence::GetAllocationScope())); auto submits = new(mem) VkSubmitInfo[submitCount]; memcpy(mem, pSubmits, submitSize); mem += submitSize; for(uint32_t i = 0; i < submitCount; i++) { size_t size = pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore); submits[i].pWaitSemaphores = reinterpret_cast(mem); memcpy(mem, pSubmits[i].pWaitSemaphores, size); mem += size; size = pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags); submits[i].pWaitDstStageMask = reinterpret_cast(mem); memcpy(mem, pSubmits[i].pWaitDstStageMask, size); mem += size; size = pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore); submits[i].pSignalSemaphores = reinterpret_cast(mem); memcpy(mem, pSubmits[i].pSignalSemaphores, size); mem += size; size = pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer); submits[i].pCommandBuffers = reinterpret_cast(mem); memcpy(mem, pSubmits[i].pCommandBuffers, size); mem += size; for(const auto *extension = reinterpret_cast(pSubmits[i].pNext); extension != nullptr; extension = reinterpret_cast(extension->pNext)) { switch(extension->sType) { case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO: { const VkTimelineSemaphoreSubmitInfo *tlsSubmitInfo = reinterpret_cast(extension); size = sizeof(VkTimelineSemaphoreSubmitInfo); VkTimelineSemaphoreSubmitInfo *tlsSubmitInfoCopy = reinterpret_cast(mem); memcpy(mem, extension, size); // Don't copy the pNext pointer at all. tlsSubmitInfoCopy->pNext = nullptr; mem += size; size = tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t); tlsSubmitInfoCopy->pWaitSemaphoreValues = reinterpret_cast(mem); memcpy(mem, tlsSubmitInfo->pWaitSemaphoreValues, size); mem += size; size = tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t); tlsSubmitInfoCopy->pSignalSemaphoreValues = reinterpret_cast(mem); memcpy(mem, tlsSubmitInfo->pSignalSemaphoreValues, size); mem += size; submits[i].pNext = tlsSubmitInfoCopy; } break; default: WARN("submitInfo[%d]->pNext sType: %s", i, vk::Stringify(extension->sType).c_str()); break; } } } return submits; } } // anonymous namespace namespace vk { Queue::Queue(Device *device, marl::Scheduler *scheduler) : device(device) { queueThread = std::thread(&Queue::taskLoop, this, scheduler); } Queue::~Queue() { Task task; task.type = Task::KILL_THREAD; pending.put(task); queueThread.join(); ASSERT_MSG(pending.count() == 0, "queue has work after worker thread shutdown"); garbageCollect(); } VkResult Queue::submit(uint32_t submitCount, const VkSubmitInfo *pSubmits, Fence *fence) { garbageCollect(); Task task; task.submitCount = submitCount; task.pSubmits = DeepCopySubmitInfo(submitCount, pSubmits); if(fence) { task.events = fence->getCountedEvent(); task.events->add(); } pending.put(task); return VK_SUCCESS; } void Queue::submitQueue(const Task &task) { if(renderer == nullptr) { renderer.reset(new sw::Renderer(device)); } for(uint32_t i = 0; i < task.submitCount; i++) { VkSubmitInfo &submitInfo = task.pSubmits[i]; const VkTimelineSemaphoreSubmitInfo *timelineInfo = nullptr; for(const auto *nextInfo = reinterpret_cast(submitInfo.pNext); nextInfo != nullptr; nextInfo = nextInfo->pNext) { switch(nextInfo->sType) { case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO: timelineInfo = reinterpret_cast(submitInfo.pNext); break; default: WARN("submitInfo.pNext->sType = %s", vk::Stringify(nextInfo->sType).c_str()); break; } } for(uint32_t j = 0; j < submitInfo.waitSemaphoreCount; j++) { if(auto *sem = DynamicCast(submitInfo.pWaitSemaphores[j])) { ASSERT_MSG(timelineInfo != nullptr, "the pNext chain must include a VkTimelineSemaphoreSubmitInfo if timeline semaphores are used"); sem->wait(timelineInfo->pWaitSemaphoreValues[j]); } else if(auto *sem = DynamicCast(submitInfo.pWaitSemaphores[j])) { sem->wait(submitInfo.pWaitDstStageMask[j]); } else { UNSUPPORTED("Unknown semaphore type"); } } { CommandBuffer::ExecutionState executionState; executionState.renderer = renderer.get(); executionState.events = task.events.get(); for(uint32_t j = 0; j < submitInfo.commandBufferCount; j++) { Cast(submitInfo.pCommandBuffers[j])->submit(executionState); } } for(uint32_t j = 0; j < submitInfo.signalSemaphoreCount; j++) { if(auto *sem = DynamicCast(submitInfo.pSignalSemaphores[j])) { ASSERT_MSG(timelineInfo != nullptr, "the pNext chain must include a VkTimelineSemaphoreSubmitInfo if timeline semaphores are used"); sem->signal(timelineInfo->pSignalSemaphoreValues[j]); } else if(auto *sem = DynamicCast(submitInfo.pSignalSemaphores[j])) { sem->signal(); } else { UNSUPPORTED("Unknown semaphore type"); } } } if(task.pSubmits) { toDelete.put(task.pSubmits); } if(task.events) { // TODO: fix renderer signaling so that work submitted separately from (but before) a fence // is guaranteed complete by the time the fence signals. renderer->synchronize(); task.events->done(); } } void Queue::taskLoop(marl::Scheduler *scheduler) { marl::Thread::setName("Queue<%p>", this); scheduler->bind(); defer(scheduler->unbind()); while(true) { Task task = pending.take(); switch(task.type) { case Task::KILL_THREAD: ASSERT_MSG(pending.count() == 0, "queue has remaining work!"); return; case Task::SUBMIT_QUEUE: submitQueue(task); break; default: UNREACHABLE("task.type %d", static_cast(task.type)); break; } } } VkResult Queue::waitIdle() { // Wait for task queue to flush. auto event = std::make_shared(); event->add(); // done() is called at the end of submitQueue() Task task; task.events = event; pending.put(task); event->wait(); garbageCollect(); return VK_SUCCESS; } void Queue::garbageCollect() { while(true) { auto v = toDelete.tryTake(); if(!v.second) { break; } vk::deallocate(v.first, DEVICE_MEMORY); } } #ifndef __ANDROID__ VkResult Queue::present(const VkPresentInfoKHR *presentInfo) { // This is a hack to deal with screen tearing for now. // Need to correctly implement threading using VkSemaphore // to get rid of it. b/132458423 waitIdle(); for(uint32_t i = 0; i < presentInfo->waitSemaphoreCount; i++) { auto *semaphore = vk::DynamicCast(presentInfo->pWaitSemaphores[i]); semaphore->wait(); } VkResult commandResult = VK_SUCCESS; for(uint32_t i = 0; i < presentInfo->swapchainCount; i++) { auto *swapchain = vk::Cast(presentInfo->pSwapchains[i]); VkResult perSwapchainResult = swapchain->present(presentInfo->pImageIndices[i]); if(presentInfo->pResults) { presentInfo->pResults[i] = perSwapchainResult; } // Keep track of the worst result code. VK_SUBOPTIMAL_KHR is a success code so it should // not override failure codes, but should not get replaced by a VK_SUCCESS result itself. if(perSwapchainResult != VK_SUCCESS) { if(commandResult == VK_SUCCESS || commandResult == VK_SUBOPTIMAL_KHR) { commandResult = perSwapchainResult; } } } return commandResult; } #endif void Queue::beginDebugUtilsLabel(const VkDebugUtilsLabelEXT *pLabelInfo) { // Optional debug label region } void Queue::endDebugUtilsLabel() { // Close debug label region opened with beginDebugUtilsLabel() } void Queue::insertDebugUtilsLabel(const VkDebugUtilsLabelEXT *pLabelInfo) { // Optional single debug label } } // namespace vk