1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "VkDevice.hpp"
16
17 #include "VkConfig.hpp"
18 #include "VkDescriptorSetLayout.hpp"
19 #include "VkFence.hpp"
20 #include "VkQueue.hpp"
21 #include "VkSemaphore.hpp"
22 #include "VkStringify.hpp"
23 #include "VkTimelineSemaphore.hpp"
24 #include "Debug/Context.hpp"
25 #include "Debug/Server.hpp"
26 #include "Device/Blitter.hpp"
27 #include "System/Debug.hpp"
28
29 #include <chrono>
30 #include <climits>
31 #include <new> // Must #include this to use "placement new"
32
33 namespace {
34
35 using time_point = std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
36
now()37 time_point now()
38 {
39 return std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now());
40 }
41
getEndTimePoint(uint64_t timeout,bool & infiniteTimeout)42 const time_point getEndTimePoint(uint64_t timeout, bool &infiniteTimeout)
43 {
44 const time_point start = now();
45 const uint64_t max_timeout = (LLONG_MAX - start.time_since_epoch().count());
46 infiniteTimeout = (timeout > max_timeout);
47 return start + std::chrono::nanoseconds(std::min(max_timeout, timeout));
48 }
49
50 } // anonymous namespace
51
52 namespace vk {
53
updateSnapshot()54 void Device::SamplingRoutineCache::updateSnapshot()
55 {
56 marl::lock lock(mutex);
57
58 if(snapshotNeedsUpdate)
59 {
60 snapshot.clear();
61
62 for(auto it : cache)
63 {
64 snapshot[it.key()] = it.data();
65 }
66
67 snapshotNeedsUpdate = false;
68 }
69 }
70
~SamplerIndexer()71 Device::SamplerIndexer::~SamplerIndexer()
72 {
73 ASSERT(map.empty());
74 }
75
index(const SamplerState & samplerState)76 uint32_t Device::SamplerIndexer::index(const SamplerState &samplerState)
77 {
78 marl::lock lock(mutex);
79
80 auto it = map.find(samplerState);
81
82 if(it != map.end())
83 {
84 it->second.count++;
85 return it->second.id;
86 }
87
88 nextID++;
89
90 map.emplace(samplerState, Identifier{ nextID, 1 });
91
92 return nextID;
93 }
94
remove(const SamplerState & samplerState)95 void Device::SamplerIndexer::remove(const SamplerState &samplerState)
96 {
97 marl::lock lock(mutex);
98
99 auto it = map.find(samplerState);
100 ASSERT(it != map.end());
101
102 auto count = --it->second.count;
103 if(count == 0)
104 {
105 map.erase(it);
106 }
107 }
108
find(uint32_t id)109 const SamplerState *Device::SamplerIndexer::find(uint32_t id)
110 {
111 marl::lock lock(mutex);
112
113 auto it = std::find_if(std::begin(map), std::end(map),
114 [&id](auto &&p) { return p.second.id == id; });
115
116 return (it != std::end(map)) ? &(it->first) : nullptr;
117 }
118
Device(const VkDeviceCreateInfo * pCreateInfo,void * mem,PhysicalDevice * physicalDevice,const VkPhysicalDeviceFeatures * enabledFeatures,const std::shared_ptr<marl::Scheduler> & scheduler)119 Device::Device(const VkDeviceCreateInfo *pCreateInfo, void *mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, const std::shared_ptr<marl::Scheduler> &scheduler)
120 : physicalDevice(physicalDevice)
121 , queues(reinterpret_cast<Queue *>(mem))
122 , enabledExtensionCount(pCreateInfo->enabledExtensionCount)
123 , enabledFeatures(enabledFeatures ? *enabledFeatures : VkPhysicalDeviceFeatures{}) // "Setting pEnabledFeatures to NULL and not including a VkPhysicalDeviceFeatures2 in the pNext member of VkDeviceCreateInfo is equivalent to setting all members of the structure to VK_FALSE."
124 , scheduler(scheduler)
125 {
126 for(uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
127 {
128 const VkDeviceQueueCreateInfo &queueCreateInfo = pCreateInfo->pQueueCreateInfos[i];
129 queueCount += queueCreateInfo.queueCount;
130 }
131
132 uint32_t queueID = 0;
133 for(uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
134 {
135 const VkDeviceQueueCreateInfo &queueCreateInfo = pCreateInfo->pQueueCreateInfos[i];
136
137 for(uint32_t j = 0; j < queueCreateInfo.queueCount; j++, queueID++)
138 {
139 new(&queues[queueID]) Queue(this, scheduler.get());
140 }
141 }
142
143 extensions = reinterpret_cast<ExtensionName *>(static_cast<uint8_t *>(mem) + (sizeof(Queue) * queueCount));
144 for(uint32_t i = 0; i < enabledExtensionCount; i++)
145 {
146 strncpy(extensions[i], pCreateInfo->ppEnabledExtensionNames[i], VK_MAX_EXTENSION_NAME_SIZE);
147 }
148
149 if(pCreateInfo->enabledLayerCount)
150 {
151 // "The ppEnabledLayerNames and enabledLayerCount members of VkDeviceCreateInfo are deprecated and their values must be ignored by implementations."
152 UNSUPPORTED("enabledLayerCount");
153 }
154
155 // TODO(b/119409619): use an allocator here so we can control all memory allocations
156 blitter.reset(new sw::Blitter());
157 samplingRoutineCache.reset(new SamplingRoutineCache());
158 samplerIndexer.reset(new SamplerIndexer());
159
160 #ifdef SWIFTSHADER_DEVICE_MEMORY_REPORT
161 const auto *deviceMemoryReportCreateInfo = GetExtendedStruct<VkDeviceDeviceMemoryReportCreateInfoEXT>(pCreateInfo->pNext, VK_STRUCTURE_TYPE_DEVICE_DEVICE_MEMORY_REPORT_CREATE_INFO_EXT);
162 if(deviceMemoryReportCreateInfo && deviceMemoryReportCreateInfo->pfnUserCallback != nullptr)
163 {
164 deviceMemoryReportCallbacks.emplace_back(deviceMemoryReportCreateInfo->pfnUserCallback, deviceMemoryReportCreateInfo->pUserData);
165 }
166 #endif // SWIFTSHADER_DEVICE_MEMORY_REPORT
167 }
168
destroy(const VkAllocationCallbacks * pAllocator)169 void Device::destroy(const VkAllocationCallbacks *pAllocator)
170 {
171 for(uint32_t i = 0; i < queueCount; i++)
172 {
173 queues[i].~Queue();
174 }
175
176 vk::freeHostMemory(queues, pAllocator);
177 }
178
ComputeRequiredAllocationSize(const VkDeviceCreateInfo * pCreateInfo)179 size_t Device::ComputeRequiredAllocationSize(const VkDeviceCreateInfo *pCreateInfo)
180 {
181 uint32_t queueCount = 0;
182 for(uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
183 {
184 queueCount += pCreateInfo->pQueueCreateInfos[i].queueCount;
185 }
186
187 return (sizeof(Queue) * queueCount) + (pCreateInfo->enabledExtensionCount * sizeof(ExtensionName));
188 }
189
hasExtension(const char * extensionName) const190 bool Device::hasExtension(const char *extensionName) const
191 {
192 for(uint32_t i = 0; i < enabledExtensionCount; i++)
193 {
194 if(strncmp(extensions[i], extensionName, VK_MAX_EXTENSION_NAME_SIZE) == 0)
195 {
196 return true;
197 }
198 }
199 return false;
200 }
201
getQueue(uint32_t queueFamilyIndex,uint32_t queueIndex) const202 VkQueue Device::getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex) const
203 {
204 ASSERT(queueFamilyIndex == 0);
205
206 return queues[queueIndex];
207 }
208
waitForFences(uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)209 VkResult Device::waitForFences(uint32_t fenceCount, const VkFence *pFences, VkBool32 waitAll, uint64_t timeout)
210 {
211 bool infiniteTimeout = false;
212 const time_point end_ns = getEndTimePoint(timeout, infiniteTimeout);
213
214 if(waitAll != VK_FALSE) // All fences must be signaled
215 {
216 for(uint32_t i = 0; i < fenceCount; i++)
217 {
218 if(timeout == 0)
219 {
220 if(Cast(pFences[i])->getStatus() != VK_SUCCESS) // At least one fence is not signaled
221 {
222 return VK_TIMEOUT;
223 }
224 }
225 else if(infiniteTimeout)
226 {
227 if(Cast(pFences[i])->wait() != VK_SUCCESS) // At least one fence is not signaled
228 {
229 return VK_TIMEOUT;
230 }
231 }
232 else
233 {
234 if(Cast(pFences[i])->wait(end_ns) != VK_SUCCESS) // At least one fence is not signaled
235 {
236 return VK_TIMEOUT;
237 }
238 }
239 }
240
241 return VK_SUCCESS;
242 }
243 else // At least one fence must be signaled
244 {
245 marl::containers::vector<marl::Event, 8> events;
246 for(uint32_t i = 0; i < fenceCount; i++)
247 {
248 events.push_back(Cast(pFences[i])->getCountedEvent()->event());
249 }
250
251 auto any = marl::Event::any(events.begin(), events.end());
252
253 if(timeout == 0)
254 {
255 return any.isSignalled() ? VK_SUCCESS : VK_TIMEOUT;
256 }
257 else if(infiniteTimeout)
258 {
259 any.wait();
260 return VK_SUCCESS;
261 }
262 else
263 {
264 return any.wait_until(end_ns) ? VK_SUCCESS : VK_TIMEOUT;
265 }
266 }
267 }
268
waitForSemaphores(const VkSemaphoreWaitInfo * pWaitInfo,uint64_t timeout)269 VkResult Device::waitForSemaphores(const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)
270 {
271 bool infiniteTimeout = false;
272 const time_point end_ns = getEndTimePoint(timeout, infiniteTimeout);
273
274 if(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT)
275 {
276 TimelineSemaphore any = TimelineSemaphore();
277
278 for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++)
279 {
280 TimelineSemaphore *semaphore = DynamicCast<TimelineSemaphore>(pWaitInfo->pSemaphores[i]);
281 uint64_t waitValue = pWaitInfo->pValues[i];
282
283 if(semaphore->getCounterValue() >= waitValue)
284 {
285 return VK_SUCCESS;
286 }
287
288 semaphore->addDependent(any, waitValue);
289 }
290
291 if(infiniteTimeout)
292 {
293 any.wait(1ull);
294 return VK_SUCCESS;
295 }
296 else
297 {
298 if(any.wait(1, end_ns) == VK_SUCCESS)
299 {
300 return VK_SUCCESS;
301 }
302 }
303
304 return VK_TIMEOUT;
305 }
306 else
307 {
308 ASSERT(pWaitInfo->flags == 0);
309 for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++)
310 {
311 TimelineSemaphore *semaphore = DynamicCast<TimelineSemaphore>(pWaitInfo->pSemaphores[i]);
312 uint64_t value = pWaitInfo->pValues[i];
313 if(infiniteTimeout)
314 {
315 semaphore->wait(value);
316 }
317 else if(semaphore->wait(pWaitInfo->pValues[i], end_ns) != VK_SUCCESS)
318 {
319 return VK_TIMEOUT;
320 }
321 }
322 return VK_SUCCESS;
323 }
324 }
325
waitIdle()326 VkResult Device::waitIdle()
327 {
328 for(uint32_t i = 0; i < queueCount; i++)
329 {
330 queues[i].waitIdle();
331 }
332
333 return VK_SUCCESS;
334 }
335
getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo * pCreateInfo,VkDescriptorSetLayoutSupport * pSupport) const336 void Device::getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
337 VkDescriptorSetLayoutSupport *pSupport) const
338 {
339 // From Vulkan Spec 13.2.1 Descriptor Set Layout, in description of vkGetDescriptorSetLayoutSupport:
340 // "This command does not consider other limits such as maxPerStageDescriptor*, and so a descriptor
341 // set layout that is supported according to this command must still satisfy the pipeline layout limits
342 // such as maxPerStageDescriptor* in order to be used in a pipeline layout."
343
344 // We have no "strange" limitations to enforce beyond the device limits, so we can safely always claim support.
345 pSupport->supported = VK_TRUE;
346
347 if(pCreateInfo->bindingCount > 0)
348 {
349 bool hasVariableSizedDescriptor = false;
350
351 const VkBaseInStructure *layoutInfo = reinterpret_cast<const VkBaseInStructure *>(pCreateInfo->pNext);
352 while(layoutInfo && !hasVariableSizedDescriptor)
353 {
354 if(layoutInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO)
355 {
356 const VkDescriptorSetLayoutBindingFlagsCreateInfo *bindingFlagsCreateInfo =
357 reinterpret_cast<const VkDescriptorSetLayoutBindingFlagsCreateInfo *>(layoutInfo);
358
359 for(uint32_t i = 0; i < bindingFlagsCreateInfo->bindingCount; i++)
360 {
361 if(bindingFlagsCreateInfo->pBindingFlags[i] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)
362 {
363 hasVariableSizedDescriptor = true;
364 break;
365 }
366 }
367 }
368 else
369 {
370 UNSUPPORTED("layoutInfo->sType = %s", vk::Stringify(layoutInfo->sType).c_str());
371 }
372
373 layoutInfo = layoutInfo->pNext;
374 }
375
376 const auto &highestNumberedBinding = pCreateInfo->pBindings[pCreateInfo->bindingCount - 1];
377
378 VkBaseOutStructure *layoutSupport = reinterpret_cast<VkBaseOutStructure *>(pSupport->pNext);
379 while(layoutSupport)
380 {
381 if(layoutSupport->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT)
382 {
383 VkDescriptorSetVariableDescriptorCountLayoutSupport *variableDescriptorCountLayoutSupport =
384 reinterpret_cast<VkDescriptorSetVariableDescriptorCountLayoutSupport *>(layoutSupport);
385
386 // If the VkDescriptorSetLayoutCreateInfo structure does not include a variable-sized descriptor,
387 // [...] then maxVariableDescriptorCount is set to zero.
388 variableDescriptorCountLayoutSupport->maxVariableDescriptorCount =
389 hasVariableSizedDescriptor ? ((highestNumberedBinding.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) ? vk::MAX_INLINE_UNIFORM_BLOCK_SIZE : vk::MAX_UPDATE_AFTER_BIND_DESCRIPTORS) : 0;
390 }
391 else
392 {
393 UNSUPPORTED("layoutSupport->sType = %s", vk::Stringify(layoutSupport->sType).c_str());
394 }
395
396 layoutSupport = layoutSupport->pNext;
397 }
398 }
399 }
400
updateDescriptorSets(uint32_t descriptorWriteCount,const VkWriteDescriptorSet * pDescriptorWrites,uint32_t descriptorCopyCount,const VkCopyDescriptorSet * pDescriptorCopies)401 void Device::updateDescriptorSets(uint32_t descriptorWriteCount, const VkWriteDescriptorSet *pDescriptorWrites,
402 uint32_t descriptorCopyCount, const VkCopyDescriptorSet *pDescriptorCopies)
403 {
404 for(uint32_t i = 0; i < descriptorWriteCount; i++)
405 {
406 DescriptorSetLayout::WriteDescriptorSet(this, pDescriptorWrites[i]);
407 }
408
409 for(uint32_t i = 0; i < descriptorCopyCount; i++)
410 {
411 DescriptorSetLayout::CopyDescriptorSet(pDescriptorCopies[i]);
412 }
413 }
414
getRequirements(VkMemoryDedicatedRequirements * requirements) const415 void Device::getRequirements(VkMemoryDedicatedRequirements *requirements) const
416 {
417 requirements->prefersDedicatedAllocation = VK_FALSE;
418 requirements->requiresDedicatedAllocation = VK_FALSE;
419 }
420
getSamplingRoutineCache() const421 Device::SamplingRoutineCache *Device::getSamplingRoutineCache() const
422 {
423 return samplingRoutineCache.get();
424 }
425
updateSamplingRoutineSnapshotCache()426 void Device::updateSamplingRoutineSnapshotCache()
427 {
428 samplingRoutineCache->updateSnapshot();
429 }
430
indexSampler(const SamplerState & samplerState)431 uint32_t Device::indexSampler(const SamplerState &samplerState)
432 {
433 return samplerIndexer->index(samplerState);
434 }
435
removeSampler(const SamplerState & samplerState)436 void Device::removeSampler(const SamplerState &samplerState)
437 {
438 samplerIndexer->remove(samplerState);
439 }
440
findSampler(uint32_t samplerId) const441 const SamplerState *Device::findSampler(uint32_t samplerId) const
442 {
443 return samplerIndexer->find(samplerId);
444 }
445
setDebugUtilsObjectName(const VkDebugUtilsObjectNameInfoEXT * pNameInfo)446 VkResult Device::setDebugUtilsObjectName(const VkDebugUtilsObjectNameInfoEXT *pNameInfo)
447 {
448 // Optionally maps user-friendly name to an object
449 return VK_SUCCESS;
450 }
451
setDebugUtilsObjectTag(const VkDebugUtilsObjectTagInfoEXT * pTagInfo)452 VkResult Device::setDebugUtilsObjectTag(const VkDebugUtilsObjectTagInfoEXT *pTagInfo)
453 {
454 // Optionally attach arbitrary data to an object
455 return VK_SUCCESS;
456 }
457
registerImageView(ImageView * imageView)458 void Device::registerImageView(ImageView *imageView)
459 {
460 if(imageView != nullptr)
461 {
462 marl::lock lock(imageViewSetMutex);
463 imageViewSet.insert(imageView);
464 }
465 }
466
unregisterImageView(ImageView * imageView)467 void Device::unregisterImageView(ImageView *imageView)
468 {
469 if(imageView != nullptr)
470 {
471 marl::lock lock(imageViewSetMutex);
472 auto it = imageViewSet.find(imageView);
473 if(it != imageViewSet.end())
474 {
475 imageViewSet.erase(it);
476 }
477 }
478 }
479
prepareForSampling(ImageView * imageView)480 void Device::prepareForSampling(ImageView *imageView)
481 {
482 if(imageView != nullptr)
483 {
484 marl::lock lock(imageViewSetMutex);
485
486 auto it = imageViewSet.find(imageView);
487 if(it != imageViewSet.end())
488 {
489 imageView->prepareForSampling();
490 }
491 }
492 }
493
contentsChanged(ImageView * imageView,Image::ContentsChangedContext context)494 void Device::contentsChanged(ImageView *imageView, Image::ContentsChangedContext context)
495 {
496 if(imageView != nullptr)
497 {
498 marl::lock lock(imageViewSetMutex);
499
500 auto it = imageViewSet.find(imageView);
501 if(it != imageViewSet.end())
502 {
503 imageView->contentsChanged(context);
504 }
505 }
506 }
507
setPrivateData(VkObjectType objectType,uint64_t objectHandle,const PrivateData * privateDataSlot,uint64_t data)508 VkResult Device::setPrivateData(VkObjectType objectType, uint64_t objectHandle, const PrivateData *privateDataSlot, uint64_t data)
509 {
510 marl::lock lock(privateDataMutex);
511
512 auto &privateDataSlotMap = privateData[privateDataSlot];
513 const PrivateDataObject privateDataObject = { objectType, objectHandle };
514 privateDataSlotMap[privateDataObject] = data;
515 return VK_SUCCESS;
516 }
517
getPrivateData(VkObjectType objectType,uint64_t objectHandle,const PrivateData * privateDataSlot,uint64_t * data)518 void Device::getPrivateData(VkObjectType objectType, uint64_t objectHandle, const PrivateData *privateDataSlot, uint64_t *data)
519 {
520 marl::lock lock(privateDataMutex);
521
522 *data = 0;
523 auto it = privateData.find(privateDataSlot);
524 if(it != privateData.end())
525 {
526 auto &privateDataSlotMap = it->second;
527 const PrivateDataObject privateDataObject = { objectType, objectHandle };
528 auto it2 = privateDataSlotMap.find(privateDataObject);
529 if(it2 != privateDataSlotMap.end())
530 {
531 *data = it2->second;
532 }
533 }
534 }
535
removePrivateDataSlot(const PrivateData * privateDataSlot)536 void Device::removePrivateDataSlot(const PrivateData *privateDataSlot)
537 {
538 marl::lock lock(privateDataMutex);
539
540 privateData.erase(privateDataSlot);
541 }
542
543 #ifdef SWIFTSHADER_DEVICE_MEMORY_REPORT
emitDeviceMemoryReport(VkDeviceMemoryReportEventTypeEXT type,uint64_t memoryObjectId,VkDeviceSize size,VkObjectType objectType,uint64_t objectHandle,uint32_t heapIndex)544 void Device::emitDeviceMemoryReport(VkDeviceMemoryReportEventTypeEXT type, uint64_t memoryObjectId, VkDeviceSize size, VkObjectType objectType, uint64_t objectHandle, uint32_t heapIndex)
545 {
546 if(deviceMemoryReportCallbacks.empty()) return;
547
548 const VkDeviceMemoryReportCallbackDataEXT callbackData = {
549 VK_STRUCTURE_TYPE_DEVICE_MEMORY_REPORT_CALLBACK_DATA_EXT, // sType
550 nullptr, // pNext
551 0, // flags
552 type, // type
553 memoryObjectId, // memoryObjectId
554 size, // size
555 objectType, // objectType
556 objectHandle, // objectHandle
557 heapIndex, // heapIndex
558 };
559 for(const auto &callback : deviceMemoryReportCallbacks)
560 {
561 callback.first(&callbackData, callback.second);
562 }
563 }
564 #endif // SWIFTSHADER_DEVICE_MEMORY_REPORT
565
566 } // namespace vk
567