1 //
2 // Copyright 2023 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // ShareGroupVk.cpp:
7 // Implements the class methods for ShareGroupVk.
8 //
9
10 #include "libANGLE/renderer/vulkan/ShareGroupVk.h"
11
12 #include "common/debug.h"
13 #include "common/system_utils.h"
14 #include "libANGLE/Context.h"
15 #include "libANGLE/Display.h"
16 #include "libANGLE/renderer/vulkan/BufferVk.h"
17 #include "libANGLE/renderer/vulkan/ContextVk.h"
18 #include "libANGLE/renderer/vulkan/DeviceVk.h"
19 #include "libANGLE/renderer/vulkan/ImageVk.h"
20 #include "libANGLE/renderer/vulkan/RendererVk.h"
21 #include "libANGLE/renderer/vulkan/SurfaceVk.h"
22 #include "libANGLE/renderer/vulkan/SyncVk.h"
23 #include "libANGLE/renderer/vulkan/TextureVk.h"
24 #include "libANGLE/renderer/vulkan/VkImageImageSiblingVk.h"
25
26 namespace rx
27 {
28
29 namespace
30 {
31 // For DesciptorSetUpdates
32 constexpr size_t kDescriptorBufferInfosInitialSize = 8;
33 constexpr size_t kDescriptorImageInfosInitialSize = 4;
34 constexpr size_t kDescriptorWriteInfosInitialSize =
35 kDescriptorBufferInfosInitialSize + kDescriptorImageInfosInitialSize;
36 constexpr size_t kDescriptorBufferViewsInitialSize = 0;
37
38 constexpr VkDeviceSize kMaxStaticBufferSizeToUseBuddyAlgorithm = 256;
39 constexpr VkDeviceSize kMaxDynamicBufferSizeToUseBuddyAlgorithm = 4096;
40
41 // How often monolithic pipelines should be created, if preferMonolithicPipelinesOverLibraries is
42 // enabled. Pipeline creation is typically O(hundreds of microseconds). A value of 2ms is chosen
43 // arbitrarily; it ensures that there is always at most a single pipeline job in progress, while
44 // maintaining a high throughput of 500 pipelines / second for heavier applications.
45 constexpr double kMonolithicPipelineJobPeriod = 0.002;
46
47 // Time interval in seconds that we should try to prune default buffer pools.
48 constexpr double kTimeElapsedForPruneDefaultBufferPool = 0.25;
49
ValidateIdenticalPriority(const egl::ContextMap & contexts,egl::ContextPriority sharedPriority)50 bool ValidateIdenticalPriority(const egl::ContextMap &contexts, egl::ContextPriority sharedPriority)
51 {
52 if (sharedPriority == egl::ContextPriority::InvalidEnum)
53 {
54 return false;
55 }
56
57 for (auto context : contexts)
58 {
59 const ContextVk *contextVk = vk::GetImpl(context.second);
60 if (contextVk->getPriority() != sharedPriority)
61 {
62 return false;
63 }
64 }
65
66 return true;
67 }
68 } // namespace
69
70 // Set to true will log bufferpool stats into INFO stream
71 #define ANGLE_ENABLE_BUFFER_POOL_STATS_LOGGING 0
72
ShareGroupVk(const egl::ShareGroupState & state)73 ShareGroupVk::ShareGroupVk(const egl::ShareGroupState &state)
74 : ShareGroupImpl(state),
75 mContextsPriority(egl::ContextPriority::InvalidEnum),
76 mIsContextsPriorityLocked(false),
77 mLastMonolithicPipelineJobTime(0)
78 {
79 mLastPruneTime = angle::GetCurrentSystemTime();
80 mSizeLimitForBuddyAlgorithm[BufferUsageType::Dynamic] =
81 kMaxDynamicBufferSizeToUseBuddyAlgorithm;
82 mSizeLimitForBuddyAlgorithm[BufferUsageType::Static] = kMaxStaticBufferSizeToUseBuddyAlgorithm;
83 }
84
onContextAdd()85 void ShareGroupVk::onContextAdd()
86 {
87 ASSERT(ValidateIdenticalPriority(getContexts(), mContextsPriority));
88 }
89
unifyContextsPriority(ContextVk * newContextVk)90 angle::Result ShareGroupVk::unifyContextsPriority(ContextVk *newContextVk)
91 {
92 const egl::ContextPriority newContextPriority = newContextVk->getPriority();
93 ASSERT(newContextPriority != egl::ContextPriority::InvalidEnum);
94
95 if (mContextsPriority == egl::ContextPriority::InvalidEnum)
96 {
97 ASSERT(!mIsContextsPriorityLocked);
98 ASSERT(getContexts().empty());
99 mContextsPriority = newContextPriority;
100 return angle::Result::Continue;
101 }
102
103 static_assert(egl::ContextPriority::Low < egl::ContextPriority::Medium);
104 static_assert(egl::ContextPriority::Medium < egl::ContextPriority::High);
105 if (mContextsPriority >= newContextPriority || mIsContextsPriorityLocked)
106 {
107 newContextVk->setPriority(mContextsPriority);
108 return angle::Result::Continue;
109 }
110
111 ANGLE_TRY(updateContextsPriority(newContextVk, newContextPriority));
112
113 return angle::Result::Continue;
114 }
115
lockDefaultContextsPriority(ContextVk * contextVk)116 angle::Result ShareGroupVk::lockDefaultContextsPriority(ContextVk *contextVk)
117 {
118 constexpr egl::ContextPriority kDefaultPriority = egl::ContextPriority::Medium;
119 if (!mIsContextsPriorityLocked)
120 {
121 if (mContextsPriority != kDefaultPriority)
122 {
123 ANGLE_TRY(updateContextsPriority(contextVk, kDefaultPriority));
124 }
125 mIsContextsPriorityLocked = true;
126 }
127 ASSERT(mContextsPriority == kDefaultPriority);
128 return angle::Result::Continue;
129 }
130
updateContextsPriority(ContextVk * contextVk,egl::ContextPriority newPriority)131 angle::Result ShareGroupVk::updateContextsPriority(ContextVk *contextVk,
132 egl::ContextPriority newPriority)
133 {
134 ASSERT(!mIsContextsPriorityLocked);
135 ASSERT(newPriority != egl::ContextPriority::InvalidEnum);
136 ASSERT(newPriority != mContextsPriority);
137 if (mContextsPriority == egl::ContextPriority::InvalidEnum)
138 {
139 ASSERT(getContexts().empty());
140 mContextsPriority = newPriority;
141 return angle::Result::Continue;
142 }
143
144 vk::ProtectionTypes protectionTypes;
145 protectionTypes.set(contextVk->getProtectionType());
146 for (auto context : getContexts())
147 {
148 protectionTypes.set(vk::GetImpl(context.second)->getProtectionType());
149 }
150
151 {
152 vk::ScopedQueueSerialIndex index;
153 RendererVk *renderer = contextVk->getRenderer();
154 ANGLE_TRY(renderer->allocateScopedQueueSerialIndex(&index));
155 ANGLE_TRY(renderer->submitPriorityDependency(contextVk, protectionTypes, mContextsPriority,
156 newPriority, index.get()));
157 }
158
159 for (auto context : getContexts())
160 {
161 ContextVk *sharedContextVk = vk::GetImpl(context.second);
162
163 ASSERT(sharedContextVk->getPriority() == mContextsPriority);
164 sharedContextVk->setPriority(newPriority);
165 }
166 mContextsPriority = newPriority;
167
168 return angle::Result::Continue;
169 }
170
onDestroy(const egl::Display * display)171 void ShareGroupVk::onDestroy(const egl::Display *display)
172 {
173 RendererVk *renderer = vk::GetImpl(display)->getRenderer();
174
175 for (vk::BufferPoolPointerArray &array : mDefaultBufferPools)
176 {
177 for (std::unique_ptr<vk::BufferPool> &pool : array)
178 {
179 if (pool)
180 {
181 // If any context uses display texture share group, it is expected that a
182 // BufferBlock may still in used by textures that outlived ShareGroup. The
183 // non-empty BufferBlock will be put into RendererVk's orphan list instead.
184 pool->destroy(renderer, mState.hasAnyContextWithDisplayTextureShareGroup());
185 }
186 }
187 }
188
189 mPipelineLayoutCache.destroy(renderer);
190 mDescriptorSetLayoutCache.destroy(renderer);
191
192 mMetaDescriptorPools[DescriptorSetIndex::UniformsAndXfb].destroy(renderer);
193 mMetaDescriptorPools[DescriptorSetIndex::Texture].destroy(renderer);
194 mMetaDescriptorPools[DescriptorSetIndex::ShaderResource].destroy(renderer);
195
196 mFramebufferCache.destroy(renderer);
197 resetPrevTexture();
198 }
199
onMutableTextureUpload(ContextVk * contextVk,TextureVk * newTexture)200 angle::Result ShareGroupVk::onMutableTextureUpload(ContextVk *contextVk, TextureVk *newTexture)
201 {
202 return mTextureUpload.onMutableTextureUpload(contextVk, newTexture);
203 }
204
onTextureRelease(TextureVk * textureVk)205 void ShareGroupVk::onTextureRelease(TextureVk *textureVk)
206 {
207 mTextureUpload.onTextureRelease(textureVk);
208 }
209
scheduleMonolithicPipelineCreationTask(ContextVk * contextVk,vk::WaitableMonolithicPipelineCreationTask * taskOut)210 angle::Result ShareGroupVk::scheduleMonolithicPipelineCreationTask(
211 ContextVk *contextVk,
212 vk::WaitableMonolithicPipelineCreationTask *taskOut)
213 {
214 ASSERT(contextVk->getFeatures().preferMonolithicPipelinesOverLibraries.enabled);
215
216 // Limit to a single task to avoid hogging all the cores.
217 if (mMonolithicPipelineCreationEvent && !mMonolithicPipelineCreationEvent->isReady())
218 {
219 return angle::Result::Continue;
220 }
221
222 // Additionally, rate limit the job postings.
223 double currentTime = angle::GetCurrentSystemTime();
224 if (currentTime - mLastMonolithicPipelineJobTime < kMonolithicPipelineJobPeriod)
225 {
226 return angle::Result::Continue;
227 }
228
229 mLastMonolithicPipelineJobTime = currentTime;
230
231 const vk::RenderPass *compatibleRenderPass = nullptr;
232 // Pull in a compatible RenderPass to be used by the task. This is done at the last minute,
233 // just before the task is scheduled, to minimize the time this reference to the render pass
234 // cache is held. If the render pass cache needs to be cleared, the main thread will wait for
235 // the job to complete.
236 ANGLE_TRY(contextVk->getCompatibleRenderPass(taskOut->getTask()->getRenderPassDesc(),
237 &compatibleRenderPass));
238 taskOut->setRenderPass(compatibleRenderPass);
239
240 egl::Display *display = contextVk->getRenderer()->getDisplay();
241 mMonolithicPipelineCreationEvent =
242 display->getMultiThreadPool()->postWorkerTask(taskOut->getTask());
243
244 taskOut->onSchedule(mMonolithicPipelineCreationEvent);
245
246 return angle::Result::Continue;
247 }
248
waitForCurrentMonolithicPipelineCreationTask()249 void ShareGroupVk::waitForCurrentMonolithicPipelineCreationTask()
250 {
251 if (mMonolithicPipelineCreationEvent)
252 {
253 mMonolithicPipelineCreationEvent->wait();
254 }
255 }
256
onMutableTextureUpload(ContextVk * contextVk,TextureVk * newTexture)257 angle::Result TextureUpload::onMutableTextureUpload(ContextVk *contextVk, TextureVk *newTexture)
258 {
259 // This feature is currently disabled in the case of display-level texture sharing.
260 ASSERT(!contextVk->hasDisplayTextureShareGroup());
261
262 // If the previous texture is null, it should be set to the current texture. We also have to
263 // make sure that the previous texture pointer is still a mutable texture. Otherwise, we skip
264 // the optimization.
265 if (mPrevUploadedMutableTexture == nullptr || mPrevUploadedMutableTexture->isImmutable())
266 {
267 mPrevUploadedMutableTexture = newTexture;
268 return angle::Result::Continue;
269 }
270
271 // Skip the optimization if we have not switched to a new texture yet.
272 if (mPrevUploadedMutableTexture == newTexture)
273 {
274 return angle::Result::Continue;
275 }
276
277 // If the mutable texture is consistently specified, we initialize a full mip chain for it.
278 if (mPrevUploadedMutableTexture->isMutableTextureConsistentlySpecifiedForFlush())
279 {
280 ANGLE_TRY(mPrevUploadedMutableTexture->ensureImageInitialized(
281 contextVk, ImageMipLevels::FullMipChain));
282 contextVk->getPerfCounters().mutableTexturesUploaded++;
283 }
284
285 // Update the mutable texture pointer with the new pointer for the next potential flush.
286 mPrevUploadedMutableTexture = newTexture;
287
288 return angle::Result::Continue;
289 }
290
onTextureRelease(TextureVk * textureVk)291 void TextureUpload::onTextureRelease(TextureVk *textureVk)
292 {
293 if (mPrevUploadedMutableTexture == textureVk)
294 {
295 resetPrevTexture();
296 }
297 }
298
299 // UpdateDescriptorSetsBuilder implementation.
UpdateDescriptorSetsBuilder()300 UpdateDescriptorSetsBuilder::UpdateDescriptorSetsBuilder()
301 {
302 // Reserve reasonable amount of spaces so that for majority of apps we don't need to grow at all
303 mDescriptorBufferInfos.reserve(kDescriptorBufferInfosInitialSize);
304 mDescriptorImageInfos.reserve(kDescriptorImageInfosInitialSize);
305 mWriteDescriptorSets.reserve(kDescriptorWriteInfosInitialSize);
306 mBufferViews.reserve(kDescriptorBufferViewsInitialSize);
307 }
308
309 UpdateDescriptorSetsBuilder::~UpdateDescriptorSetsBuilder() = default;
310
311 template <typename T, const T *VkWriteDescriptorSet::*pInfo>
growDescriptorCapacity(std::vector<T> * descriptorVector,size_t newSize)312 void UpdateDescriptorSetsBuilder::growDescriptorCapacity(std::vector<T> *descriptorVector,
313 size_t newSize)
314 {
315 const T *const oldInfoStart = descriptorVector->empty() ? nullptr : &(*descriptorVector)[0];
316 size_t newCapacity = std::max(descriptorVector->capacity() << 1, newSize);
317 descriptorVector->reserve(newCapacity);
318
319 if (oldInfoStart)
320 {
321 // patch mWriteInfo with new BufferInfo/ImageInfo pointers
322 for (VkWriteDescriptorSet &set : mWriteDescriptorSets)
323 {
324 if (set.*pInfo)
325 {
326 size_t index = set.*pInfo - oldInfoStart;
327 set.*pInfo = &(*descriptorVector)[index];
328 }
329 }
330 }
331 }
332
333 template <typename T, const T *VkWriteDescriptorSet::*pInfo>
allocDescriptorInfos(std::vector<T> * descriptorVector,size_t count)334 T *UpdateDescriptorSetsBuilder::allocDescriptorInfos(std::vector<T> *descriptorVector, size_t count)
335 {
336 size_t oldSize = descriptorVector->size();
337 size_t newSize = oldSize + count;
338 if (newSize > descriptorVector->capacity())
339 {
340 // If we have reached capacity, grow the storage and patch the descriptor set with new
341 // buffer info pointer
342 growDescriptorCapacity<T, pInfo>(descriptorVector, newSize);
343 }
344 descriptorVector->resize(newSize);
345 return &(*descriptorVector)[oldSize];
346 }
347
allocDescriptorBufferInfos(size_t count)348 VkDescriptorBufferInfo *UpdateDescriptorSetsBuilder::allocDescriptorBufferInfos(size_t count)
349 {
350 return allocDescriptorInfos<VkDescriptorBufferInfo, &VkWriteDescriptorSet::pBufferInfo>(
351 &mDescriptorBufferInfos, count);
352 }
353
allocDescriptorImageInfos(size_t count)354 VkDescriptorImageInfo *UpdateDescriptorSetsBuilder::allocDescriptorImageInfos(size_t count)
355 {
356 return allocDescriptorInfos<VkDescriptorImageInfo, &VkWriteDescriptorSet::pImageInfo>(
357 &mDescriptorImageInfos, count);
358 }
359
allocWriteDescriptorSets(size_t count)360 VkWriteDescriptorSet *UpdateDescriptorSetsBuilder::allocWriteDescriptorSets(size_t count)
361 {
362 size_t oldSize = mWriteDescriptorSets.size();
363 size_t newSize = oldSize + count;
364 mWriteDescriptorSets.resize(newSize);
365 return &mWriteDescriptorSets[oldSize];
366 }
367
allocBufferViews(size_t count)368 VkBufferView *UpdateDescriptorSetsBuilder::allocBufferViews(size_t count)
369 {
370 return allocDescriptorInfos<VkBufferView, &VkWriteDescriptorSet::pTexelBufferView>(
371 &mBufferViews, count);
372 }
373
flushDescriptorSetUpdates(VkDevice device)374 uint32_t UpdateDescriptorSetsBuilder::flushDescriptorSetUpdates(VkDevice device)
375 {
376 if (mWriteDescriptorSets.empty())
377 {
378 ASSERT(mDescriptorBufferInfos.empty());
379 ASSERT(mDescriptorImageInfos.empty());
380 return 0;
381 }
382
383 vkUpdateDescriptorSets(device, static_cast<uint32_t>(mWriteDescriptorSets.size()),
384 mWriteDescriptorSets.data(), 0, nullptr);
385
386 uint32_t retVal = static_cast<uint32_t>(mWriteDescriptorSets.size());
387
388 mWriteDescriptorSets.clear();
389 mDescriptorBufferInfos.clear();
390 mDescriptorImageInfos.clear();
391 mBufferViews.clear();
392
393 return retVal;
394 }
395
getDefaultBufferPool(RendererVk * renderer,VkDeviceSize size,uint32_t memoryTypeIndex,BufferUsageType usageType)396 vk::BufferPool *ShareGroupVk::getDefaultBufferPool(RendererVk *renderer,
397 VkDeviceSize size,
398 uint32_t memoryTypeIndex,
399 BufferUsageType usageType)
400 {
401 // First pick allocation algorithm. Buddy algorithm is faster, but waste more memory
402 // due to power of two alignment. For smaller size allocation we always use buddy algorithm
403 // since align to power of two does not waste too much memory. For dynamic usage, the size
404 // threshold for buddy algorithm is relaxed since the performance is more important.
405 SuballocationAlgorithm algorithm = size <= mSizeLimitForBuddyAlgorithm[usageType]
406 ? SuballocationAlgorithm::Buddy
407 : SuballocationAlgorithm::General;
408
409 if (!mDefaultBufferPools[algorithm][memoryTypeIndex])
410 {
411 const vk::Allocator &allocator = renderer->getAllocator();
412 VkBufferUsageFlags usageFlags = GetDefaultBufferUsageFlags(renderer);
413
414 VkMemoryPropertyFlags memoryPropertyFlags;
415 allocator.getMemoryTypeProperties(memoryTypeIndex, &memoryPropertyFlags);
416
417 std::unique_ptr<vk::BufferPool> pool = std::make_unique<vk::BufferPool>();
418 vma::VirtualBlockCreateFlags vmaFlags = algorithm == SuballocationAlgorithm::Buddy
419 ? vma::VirtualBlockCreateFlagBits::BUDDY
420 : vma::VirtualBlockCreateFlagBits::GENERAL;
421 pool->initWithFlags(renderer, vmaFlags, usageFlags, 0, memoryTypeIndex,
422 memoryPropertyFlags);
423 mDefaultBufferPools[algorithm][memoryTypeIndex] = std::move(pool);
424 }
425
426 return mDefaultBufferPools[algorithm][memoryTypeIndex].get();
427 }
428
pruneDefaultBufferPools(RendererVk * renderer)429 void ShareGroupVk::pruneDefaultBufferPools(RendererVk *renderer)
430 {
431 mLastPruneTime = angle::GetCurrentSystemTime();
432
433 // Bail out if no suballocation have been destroyed since last prune.
434 if (renderer->getSuballocationDestroyedSize() == 0)
435 {
436 return;
437 }
438
439 for (vk::BufferPoolPointerArray &array : mDefaultBufferPools)
440 {
441 for (std::unique_ptr<vk::BufferPool> &pool : array)
442 {
443 if (pool)
444 {
445 pool->pruneEmptyBuffers(renderer);
446 }
447 }
448 }
449
450 renderer->onBufferPoolPrune();
451
452 #if ANGLE_ENABLE_BUFFER_POOL_STATS_LOGGING
453 logBufferPools();
454 #endif
455 }
456
isDueForBufferPoolPrune(RendererVk * renderer)457 bool ShareGroupVk::isDueForBufferPoolPrune(RendererVk *renderer)
458 {
459 // Ensure we periodically prune to maintain the heuristic information
460 double timeElapsed = angle::GetCurrentSystemTime() - mLastPruneTime;
461 if (timeElapsed > kTimeElapsedForPruneDefaultBufferPool)
462 {
463 return true;
464 }
465
466 // If we have destroyed a lot of memory, also prune to ensure memory gets freed as soon as
467 // possible
468 if (renderer->getSuballocationDestroyedSize() >= kMaxTotalEmptyBufferBytes)
469 {
470 return true;
471 }
472
473 return false;
474 }
475
calculateTotalBufferCount(size_t * bufferCount,VkDeviceSize * totalSize) const476 void ShareGroupVk::calculateTotalBufferCount(size_t *bufferCount, VkDeviceSize *totalSize) const
477 {
478 *bufferCount = 0;
479 *totalSize = 0;
480 for (const vk::BufferPoolPointerArray &array : mDefaultBufferPools)
481 {
482 for (const std::unique_ptr<vk::BufferPool> &pool : array)
483 {
484 if (pool)
485 {
486 *bufferCount += pool->getBufferCount();
487 *totalSize += pool->getMemorySize();
488 }
489 }
490 }
491 }
492
logBufferPools() const493 void ShareGroupVk::logBufferPools() const
494 {
495 size_t totalBufferCount;
496 VkDeviceSize totalMemorySize;
497 calculateTotalBufferCount(&totalBufferCount, &totalMemorySize);
498
499 INFO() << "BufferBlocks count:" << totalBufferCount << " memorySize:" << totalMemorySize / 1024
500 << " UnusedBytes/memorySize (KBs):";
501 for (const vk::BufferPoolPointerArray &array : mDefaultBufferPools)
502 {
503 for (const std::unique_ptr<vk::BufferPool> &pool : array)
504 {
505 if (pool && pool->getBufferCount() > 0)
506 {
507 std::ostringstream log;
508 pool->addStats(&log);
509 INFO() << "\t" << log.str();
510 }
511 }
512 }
513 }
514 } // namespace rx
515