• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2023 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // ShareGroupVk.cpp:
7 //    Implements the class methods for ShareGroupVk.
8 //
9 
10 #include "libANGLE/renderer/vulkan/ShareGroupVk.h"
11 
12 #include "common/debug.h"
13 #include "common/system_utils.h"
14 #include "libANGLE/Context.h"
15 #include "libANGLE/Display.h"
16 #include "libANGLE/renderer/vulkan/BufferVk.h"
17 #include "libANGLE/renderer/vulkan/ContextVk.h"
18 #include "libANGLE/renderer/vulkan/DeviceVk.h"
19 #include "libANGLE/renderer/vulkan/ImageVk.h"
20 #include "libANGLE/renderer/vulkan/RendererVk.h"
21 #include "libANGLE/renderer/vulkan/SurfaceVk.h"
22 #include "libANGLE/renderer/vulkan/SyncVk.h"
23 #include "libANGLE/renderer/vulkan/TextureVk.h"
24 #include "libANGLE/renderer/vulkan/VkImageImageSiblingVk.h"
25 
26 namespace rx
27 {
28 
29 namespace
30 {
31 // For DesciptorSetUpdates
32 constexpr size_t kDescriptorBufferInfosInitialSize = 8;
33 constexpr size_t kDescriptorImageInfosInitialSize  = 4;
34 constexpr size_t kDescriptorWriteInfosInitialSize =
35     kDescriptorBufferInfosInitialSize + kDescriptorImageInfosInitialSize;
36 constexpr size_t kDescriptorBufferViewsInitialSize = 0;
37 
38 constexpr VkDeviceSize kMaxStaticBufferSizeToUseBuddyAlgorithm  = 256;
39 constexpr VkDeviceSize kMaxDynamicBufferSizeToUseBuddyAlgorithm = 4096;
40 
41 // How often monolithic pipelines should be created, if preferMonolithicPipelinesOverLibraries is
42 // enabled.  Pipeline creation is typically O(hundreds of microseconds).  A value of 2ms is chosen
43 // arbitrarily; it ensures that there is always at most a single pipeline job in progress, while
44 // maintaining a high throughput of 500 pipelines / second for heavier applications.
45 constexpr double kMonolithicPipelineJobPeriod = 0.002;
46 
47 // Time interval in seconds that we should try to prune default buffer pools.
48 constexpr double kTimeElapsedForPruneDefaultBufferPool = 0.25;
49 
ValidateIdenticalPriority(const egl::ContextMap & contexts,egl::ContextPriority sharedPriority)50 bool ValidateIdenticalPriority(const egl::ContextMap &contexts, egl::ContextPriority sharedPriority)
51 {
52     if (sharedPriority == egl::ContextPriority::InvalidEnum)
53     {
54         return false;
55     }
56 
57     for (auto context : contexts)
58     {
59         const ContextVk *contextVk = vk::GetImpl(context.second);
60         if (contextVk->getPriority() != sharedPriority)
61         {
62             return false;
63         }
64     }
65 
66     return true;
67 }
68 }  // namespace
69 
70 // Set to true will log bufferpool stats into INFO stream
71 #define ANGLE_ENABLE_BUFFER_POOL_STATS_LOGGING 0
72 
ShareGroupVk(const egl::ShareGroupState & state)73 ShareGroupVk::ShareGroupVk(const egl::ShareGroupState &state)
74     : ShareGroupImpl(state),
75       mContextsPriority(egl::ContextPriority::InvalidEnum),
76       mIsContextsPriorityLocked(false),
77       mLastMonolithicPipelineJobTime(0)
78 {
79     mLastPruneTime = angle::GetCurrentSystemTime();
80     mSizeLimitForBuddyAlgorithm[BufferUsageType::Dynamic] =
81         kMaxDynamicBufferSizeToUseBuddyAlgorithm;
82     mSizeLimitForBuddyAlgorithm[BufferUsageType::Static] = kMaxStaticBufferSizeToUseBuddyAlgorithm;
83 }
84 
onContextAdd()85 void ShareGroupVk::onContextAdd()
86 {
87     ASSERT(ValidateIdenticalPriority(getContexts(), mContextsPriority));
88 }
89 
unifyContextsPriority(ContextVk * newContextVk)90 angle::Result ShareGroupVk::unifyContextsPriority(ContextVk *newContextVk)
91 {
92     const egl::ContextPriority newContextPriority = newContextVk->getPriority();
93     ASSERT(newContextPriority != egl::ContextPriority::InvalidEnum);
94 
95     if (mContextsPriority == egl::ContextPriority::InvalidEnum)
96     {
97         ASSERT(!mIsContextsPriorityLocked);
98         ASSERT(getContexts().empty());
99         mContextsPriority = newContextPriority;
100         return angle::Result::Continue;
101     }
102 
103     static_assert(egl::ContextPriority::Low < egl::ContextPriority::Medium);
104     static_assert(egl::ContextPriority::Medium < egl::ContextPriority::High);
105     if (mContextsPriority >= newContextPriority || mIsContextsPriorityLocked)
106     {
107         newContextVk->setPriority(mContextsPriority);
108         return angle::Result::Continue;
109     }
110 
111     ANGLE_TRY(updateContextsPriority(newContextVk, newContextPriority));
112 
113     return angle::Result::Continue;
114 }
115 
lockDefaultContextsPriority(ContextVk * contextVk)116 angle::Result ShareGroupVk::lockDefaultContextsPriority(ContextVk *contextVk)
117 {
118     constexpr egl::ContextPriority kDefaultPriority = egl::ContextPriority::Medium;
119     if (!mIsContextsPriorityLocked)
120     {
121         if (mContextsPriority != kDefaultPriority)
122         {
123             ANGLE_TRY(updateContextsPriority(contextVk, kDefaultPriority));
124         }
125         mIsContextsPriorityLocked = true;
126     }
127     ASSERT(mContextsPriority == kDefaultPriority);
128     return angle::Result::Continue;
129 }
130 
updateContextsPriority(ContextVk * contextVk,egl::ContextPriority newPriority)131 angle::Result ShareGroupVk::updateContextsPriority(ContextVk *contextVk,
132                                                    egl::ContextPriority newPriority)
133 {
134     ASSERT(!mIsContextsPriorityLocked);
135     ASSERT(newPriority != egl::ContextPriority::InvalidEnum);
136     ASSERT(newPriority != mContextsPriority);
137     if (mContextsPriority == egl::ContextPriority::InvalidEnum)
138     {
139         ASSERT(getContexts().empty());
140         mContextsPriority = newPriority;
141         return angle::Result::Continue;
142     }
143 
144     vk::ProtectionTypes protectionTypes;
145     protectionTypes.set(contextVk->getProtectionType());
146     for (auto context : getContexts())
147     {
148         protectionTypes.set(vk::GetImpl(context.second)->getProtectionType());
149     }
150 
151     {
152         vk::ScopedQueueSerialIndex index;
153         RendererVk *renderer = contextVk->getRenderer();
154         ANGLE_TRY(renderer->allocateScopedQueueSerialIndex(&index));
155         ANGLE_TRY(renderer->submitPriorityDependency(contextVk, protectionTypes, mContextsPriority,
156                                                      newPriority, index.get()));
157     }
158 
159     for (auto context : getContexts())
160     {
161         ContextVk *sharedContextVk = vk::GetImpl(context.second);
162 
163         ASSERT(sharedContextVk->getPriority() == mContextsPriority);
164         sharedContextVk->setPriority(newPriority);
165     }
166     mContextsPriority = newPriority;
167 
168     return angle::Result::Continue;
169 }
170 
onDestroy(const egl::Display * display)171 void ShareGroupVk::onDestroy(const egl::Display *display)
172 {
173     RendererVk *renderer = vk::GetImpl(display)->getRenderer();
174 
175     for (vk::BufferPoolPointerArray &array : mDefaultBufferPools)
176     {
177         for (std::unique_ptr<vk::BufferPool> &pool : array)
178         {
179             if (pool)
180             {
181                 // If any context uses display texture share group, it is expected that a
182                 // BufferBlock may still in used by textures that outlived ShareGroup.  The
183                 // non-empty BufferBlock will be put into RendererVk's orphan list instead.
184                 pool->destroy(renderer, mState.hasAnyContextWithDisplayTextureShareGroup());
185             }
186         }
187     }
188 
189     mPipelineLayoutCache.destroy(renderer);
190     mDescriptorSetLayoutCache.destroy(renderer);
191 
192     mMetaDescriptorPools[DescriptorSetIndex::UniformsAndXfb].destroy(renderer);
193     mMetaDescriptorPools[DescriptorSetIndex::Texture].destroy(renderer);
194     mMetaDescriptorPools[DescriptorSetIndex::ShaderResource].destroy(renderer);
195 
196     mFramebufferCache.destroy(renderer);
197     resetPrevTexture();
198 }
199 
onMutableTextureUpload(ContextVk * contextVk,TextureVk * newTexture)200 angle::Result ShareGroupVk::onMutableTextureUpload(ContextVk *contextVk, TextureVk *newTexture)
201 {
202     return mTextureUpload.onMutableTextureUpload(contextVk, newTexture);
203 }
204 
onTextureRelease(TextureVk * textureVk)205 void ShareGroupVk::onTextureRelease(TextureVk *textureVk)
206 {
207     mTextureUpload.onTextureRelease(textureVk);
208 }
209 
scheduleMonolithicPipelineCreationTask(ContextVk * contextVk,vk::WaitableMonolithicPipelineCreationTask * taskOut)210 angle::Result ShareGroupVk::scheduleMonolithicPipelineCreationTask(
211     ContextVk *contextVk,
212     vk::WaitableMonolithicPipelineCreationTask *taskOut)
213 {
214     ASSERT(contextVk->getFeatures().preferMonolithicPipelinesOverLibraries.enabled);
215 
216     // Limit to a single task to avoid hogging all the cores.
217     if (mMonolithicPipelineCreationEvent && !mMonolithicPipelineCreationEvent->isReady())
218     {
219         return angle::Result::Continue;
220     }
221 
222     // Additionally, rate limit the job postings.
223     double currentTime = angle::GetCurrentSystemTime();
224     if (currentTime - mLastMonolithicPipelineJobTime < kMonolithicPipelineJobPeriod)
225     {
226         return angle::Result::Continue;
227     }
228 
229     mLastMonolithicPipelineJobTime = currentTime;
230 
231     const vk::RenderPass *compatibleRenderPass = nullptr;
232     // Pull in a compatible RenderPass to be used by the task.  This is done at the last minute,
233     // just before the task is scheduled, to minimize the time this reference to the render pass
234     // cache is held.  If the render pass cache needs to be cleared, the main thread will wait for
235     // the job to complete.
236     ANGLE_TRY(contextVk->getCompatibleRenderPass(taskOut->getTask()->getRenderPassDesc(),
237                                                  &compatibleRenderPass));
238     taskOut->setRenderPass(compatibleRenderPass);
239 
240     egl::Display *display = contextVk->getRenderer()->getDisplay();
241     mMonolithicPipelineCreationEvent =
242         display->getMultiThreadPool()->postWorkerTask(taskOut->getTask());
243 
244     taskOut->onSchedule(mMonolithicPipelineCreationEvent);
245 
246     return angle::Result::Continue;
247 }
248 
waitForCurrentMonolithicPipelineCreationTask()249 void ShareGroupVk::waitForCurrentMonolithicPipelineCreationTask()
250 {
251     if (mMonolithicPipelineCreationEvent)
252     {
253         mMonolithicPipelineCreationEvent->wait();
254     }
255 }
256 
onMutableTextureUpload(ContextVk * contextVk,TextureVk * newTexture)257 angle::Result TextureUpload::onMutableTextureUpload(ContextVk *contextVk, TextureVk *newTexture)
258 {
259     // This feature is currently disabled in the case of display-level texture sharing.
260     ASSERT(!contextVk->hasDisplayTextureShareGroup());
261 
262     // If the previous texture is null, it should be set to the current texture. We also have to
263     // make sure that the previous texture pointer is still a mutable texture. Otherwise, we skip
264     // the optimization.
265     if (mPrevUploadedMutableTexture == nullptr || mPrevUploadedMutableTexture->isImmutable())
266     {
267         mPrevUploadedMutableTexture = newTexture;
268         return angle::Result::Continue;
269     }
270 
271     // Skip the optimization if we have not switched to a new texture yet.
272     if (mPrevUploadedMutableTexture == newTexture)
273     {
274         return angle::Result::Continue;
275     }
276 
277     // If the mutable texture is consistently specified, we initialize a full mip chain for it.
278     if (mPrevUploadedMutableTexture->isMutableTextureConsistentlySpecifiedForFlush())
279     {
280         ANGLE_TRY(mPrevUploadedMutableTexture->ensureImageInitialized(
281             contextVk, ImageMipLevels::FullMipChain));
282         contextVk->getPerfCounters().mutableTexturesUploaded++;
283     }
284 
285     // Update the mutable texture pointer with the new pointer for the next potential flush.
286     mPrevUploadedMutableTexture = newTexture;
287 
288     return angle::Result::Continue;
289 }
290 
onTextureRelease(TextureVk * textureVk)291 void TextureUpload::onTextureRelease(TextureVk *textureVk)
292 {
293     if (mPrevUploadedMutableTexture == textureVk)
294     {
295         resetPrevTexture();
296     }
297 }
298 
299 // UpdateDescriptorSetsBuilder implementation.
UpdateDescriptorSetsBuilder()300 UpdateDescriptorSetsBuilder::UpdateDescriptorSetsBuilder()
301 {
302     // Reserve reasonable amount of spaces so that for majority of apps we don't need to grow at all
303     mDescriptorBufferInfos.reserve(kDescriptorBufferInfosInitialSize);
304     mDescriptorImageInfos.reserve(kDescriptorImageInfosInitialSize);
305     mWriteDescriptorSets.reserve(kDescriptorWriteInfosInitialSize);
306     mBufferViews.reserve(kDescriptorBufferViewsInitialSize);
307 }
308 
309 UpdateDescriptorSetsBuilder::~UpdateDescriptorSetsBuilder() = default;
310 
311 template <typename T, const T *VkWriteDescriptorSet::*pInfo>
growDescriptorCapacity(std::vector<T> * descriptorVector,size_t newSize)312 void UpdateDescriptorSetsBuilder::growDescriptorCapacity(std::vector<T> *descriptorVector,
313                                                          size_t newSize)
314 {
315     const T *const oldInfoStart = descriptorVector->empty() ? nullptr : &(*descriptorVector)[0];
316     size_t newCapacity          = std::max(descriptorVector->capacity() << 1, newSize);
317     descriptorVector->reserve(newCapacity);
318 
319     if (oldInfoStart)
320     {
321         // patch mWriteInfo with new BufferInfo/ImageInfo pointers
322         for (VkWriteDescriptorSet &set : mWriteDescriptorSets)
323         {
324             if (set.*pInfo)
325             {
326                 size_t index = set.*pInfo - oldInfoStart;
327                 set.*pInfo   = &(*descriptorVector)[index];
328             }
329         }
330     }
331 }
332 
333 template <typename T, const T *VkWriteDescriptorSet::*pInfo>
allocDescriptorInfos(std::vector<T> * descriptorVector,size_t count)334 T *UpdateDescriptorSetsBuilder::allocDescriptorInfos(std::vector<T> *descriptorVector, size_t count)
335 {
336     size_t oldSize = descriptorVector->size();
337     size_t newSize = oldSize + count;
338     if (newSize > descriptorVector->capacity())
339     {
340         // If we have reached capacity, grow the storage and patch the descriptor set with new
341         // buffer info pointer
342         growDescriptorCapacity<T, pInfo>(descriptorVector, newSize);
343     }
344     descriptorVector->resize(newSize);
345     return &(*descriptorVector)[oldSize];
346 }
347 
allocDescriptorBufferInfos(size_t count)348 VkDescriptorBufferInfo *UpdateDescriptorSetsBuilder::allocDescriptorBufferInfos(size_t count)
349 {
350     return allocDescriptorInfos<VkDescriptorBufferInfo, &VkWriteDescriptorSet::pBufferInfo>(
351         &mDescriptorBufferInfos, count);
352 }
353 
allocDescriptorImageInfos(size_t count)354 VkDescriptorImageInfo *UpdateDescriptorSetsBuilder::allocDescriptorImageInfos(size_t count)
355 {
356     return allocDescriptorInfos<VkDescriptorImageInfo, &VkWriteDescriptorSet::pImageInfo>(
357         &mDescriptorImageInfos, count);
358 }
359 
allocWriteDescriptorSets(size_t count)360 VkWriteDescriptorSet *UpdateDescriptorSetsBuilder::allocWriteDescriptorSets(size_t count)
361 {
362     size_t oldSize = mWriteDescriptorSets.size();
363     size_t newSize = oldSize + count;
364     mWriteDescriptorSets.resize(newSize);
365     return &mWriteDescriptorSets[oldSize];
366 }
367 
allocBufferViews(size_t count)368 VkBufferView *UpdateDescriptorSetsBuilder::allocBufferViews(size_t count)
369 {
370     return allocDescriptorInfos<VkBufferView, &VkWriteDescriptorSet::pTexelBufferView>(
371         &mBufferViews, count);
372 }
373 
flushDescriptorSetUpdates(VkDevice device)374 uint32_t UpdateDescriptorSetsBuilder::flushDescriptorSetUpdates(VkDevice device)
375 {
376     if (mWriteDescriptorSets.empty())
377     {
378         ASSERT(mDescriptorBufferInfos.empty());
379         ASSERT(mDescriptorImageInfos.empty());
380         return 0;
381     }
382 
383     vkUpdateDescriptorSets(device, static_cast<uint32_t>(mWriteDescriptorSets.size()),
384                            mWriteDescriptorSets.data(), 0, nullptr);
385 
386     uint32_t retVal = static_cast<uint32_t>(mWriteDescriptorSets.size());
387 
388     mWriteDescriptorSets.clear();
389     mDescriptorBufferInfos.clear();
390     mDescriptorImageInfos.clear();
391     mBufferViews.clear();
392 
393     return retVal;
394 }
395 
getDefaultBufferPool(RendererVk * renderer,VkDeviceSize size,uint32_t memoryTypeIndex,BufferUsageType usageType)396 vk::BufferPool *ShareGroupVk::getDefaultBufferPool(RendererVk *renderer,
397                                                    VkDeviceSize size,
398                                                    uint32_t memoryTypeIndex,
399                                                    BufferUsageType usageType)
400 {
401     // First pick allocation algorithm. Buddy algorithm is faster, but waste more memory
402     // due to power of two alignment. For smaller size allocation we always use buddy algorithm
403     // since align to power of two does not waste too much memory. For dynamic usage, the size
404     // threshold for buddy algorithm is relaxed since the performance is more important.
405     SuballocationAlgorithm algorithm = size <= mSizeLimitForBuddyAlgorithm[usageType]
406                                            ? SuballocationAlgorithm::Buddy
407                                            : SuballocationAlgorithm::General;
408 
409     if (!mDefaultBufferPools[algorithm][memoryTypeIndex])
410     {
411         const vk::Allocator &allocator = renderer->getAllocator();
412         VkBufferUsageFlags usageFlags  = GetDefaultBufferUsageFlags(renderer);
413 
414         VkMemoryPropertyFlags memoryPropertyFlags;
415         allocator.getMemoryTypeProperties(memoryTypeIndex, &memoryPropertyFlags);
416 
417         std::unique_ptr<vk::BufferPool> pool  = std::make_unique<vk::BufferPool>();
418         vma::VirtualBlockCreateFlags vmaFlags = algorithm == SuballocationAlgorithm::Buddy
419                                                     ? vma::VirtualBlockCreateFlagBits::BUDDY
420                                                     : vma::VirtualBlockCreateFlagBits::GENERAL;
421         pool->initWithFlags(renderer, vmaFlags, usageFlags, 0, memoryTypeIndex,
422                             memoryPropertyFlags);
423         mDefaultBufferPools[algorithm][memoryTypeIndex] = std::move(pool);
424     }
425 
426     return mDefaultBufferPools[algorithm][memoryTypeIndex].get();
427 }
428 
pruneDefaultBufferPools(RendererVk * renderer)429 void ShareGroupVk::pruneDefaultBufferPools(RendererVk *renderer)
430 {
431     mLastPruneTime = angle::GetCurrentSystemTime();
432 
433     // Bail out if no suballocation have been destroyed since last prune.
434     if (renderer->getSuballocationDestroyedSize() == 0)
435     {
436         return;
437     }
438 
439     for (vk::BufferPoolPointerArray &array : mDefaultBufferPools)
440     {
441         for (std::unique_ptr<vk::BufferPool> &pool : array)
442         {
443             if (pool)
444             {
445                 pool->pruneEmptyBuffers(renderer);
446             }
447         }
448     }
449 
450     renderer->onBufferPoolPrune();
451 
452 #if ANGLE_ENABLE_BUFFER_POOL_STATS_LOGGING
453     logBufferPools();
454 #endif
455 }
456 
isDueForBufferPoolPrune(RendererVk * renderer)457 bool ShareGroupVk::isDueForBufferPoolPrune(RendererVk *renderer)
458 {
459     // Ensure we periodically prune to maintain the heuristic information
460     double timeElapsed = angle::GetCurrentSystemTime() - mLastPruneTime;
461     if (timeElapsed > kTimeElapsedForPruneDefaultBufferPool)
462     {
463         return true;
464     }
465 
466     // If we have destroyed a lot of memory, also prune to ensure memory gets freed as soon as
467     // possible
468     if (renderer->getSuballocationDestroyedSize() >= kMaxTotalEmptyBufferBytes)
469     {
470         return true;
471     }
472 
473     return false;
474 }
475 
calculateTotalBufferCount(size_t * bufferCount,VkDeviceSize * totalSize) const476 void ShareGroupVk::calculateTotalBufferCount(size_t *bufferCount, VkDeviceSize *totalSize) const
477 {
478     *bufferCount = 0;
479     *totalSize   = 0;
480     for (const vk::BufferPoolPointerArray &array : mDefaultBufferPools)
481     {
482         for (const std::unique_ptr<vk::BufferPool> &pool : array)
483         {
484             if (pool)
485             {
486                 *bufferCount += pool->getBufferCount();
487                 *totalSize += pool->getMemorySize();
488             }
489         }
490     }
491 }
492 
logBufferPools() const493 void ShareGroupVk::logBufferPools() const
494 {
495     size_t totalBufferCount;
496     VkDeviceSize totalMemorySize;
497     calculateTotalBufferCount(&totalBufferCount, &totalMemorySize);
498 
499     INFO() << "BufferBlocks count:" << totalBufferCount << " memorySize:" << totalMemorySize / 1024
500            << " UnusedBytes/memorySize (KBs):";
501     for (const vk::BufferPoolPointerArray &array : mDefaultBufferPools)
502     {
503         for (const std::unique_ptr<vk::BufferPool> &pool : array)
504         {
505             if (pool && pool->getBufferCount() > 0)
506             {
507                 std::ostringstream log;
508                 pool->addStats(&log);
509                 INFO() << "\t" << log.str();
510             }
511         }
512     }
513 }
514 }  // namespace rx
515