1/* 2 * Copyright 2018 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "src/gpu/mtl/GrMtlResourceProvider.h" 9 10#include "include/gpu/GrContextOptions.h" 11#include "src/gpu/GrContextPriv.h" 12#include "src/gpu/mtl/GrMtlCommandBuffer.h" 13#include "src/gpu/mtl/GrMtlGpu.h" 14#include "src/gpu/mtl/GrMtlPipelineState.h" 15#include "src/gpu/mtl/GrMtlUtil.h" 16 17#include "src/sksl/SkSLCompiler.h" 18 19#if !__has_feature(objc_arc) 20#error This file must be compiled with Arc. Use -fobjc-arc flag 21#endif 22 23GrMtlResourceProvider::GrMtlResourceProvider(GrMtlGpu* gpu) 24 : fGpu(gpu) { 25 fPipelineStateCache.reset(new PipelineStateCache(gpu)); 26 fBufferSuballocator.reset(new BufferSuballocator(gpu->device(), kBufferSuballocatorStartSize)); 27 // TODO: maxBufferLength seems like a reasonable metric to determine fBufferSuballocatorMaxSize 28 // but may need tuning. Might also need a GrContextOption to let the client set this. 29#ifdef SK_BUILD_FOR_MAC 30 int64_t maxBufferLength = 1024*1024*1024; 31#else 32 int64_t maxBufferLength = 256*1024*1024; 33#endif 34 if (@available(iOS 12, macOS 10.14, *)) { 35 maxBufferLength = gpu->device().maxBufferLength; 36 } 37 fBufferSuballocatorMaxSize = maxBufferLength/16; 38} 39 40GrMtlPipelineState* GrMtlResourceProvider::findOrCreateCompatiblePipelineState( 41 GrRenderTarget* renderTarget, 42 const GrProgramInfo& programInfo) { 43 return fPipelineStateCache->refPipelineState(renderTarget, programInfo); 44} 45 46//////////////////////////////////////////////////////////////////////////////////////////////// 47 48GrMtlDepthStencil* GrMtlResourceProvider::findOrCreateCompatibleDepthStencilState( 49 const GrStencilSettings& stencil, GrSurfaceOrigin origin) { 50 GrMtlDepthStencil* depthStencilState; 51 GrMtlDepthStencil::Key key = GrMtlDepthStencil::GenerateKey(stencil, origin); 52 depthStencilState = fDepthStencilStates.find(key); 53 if (!depthStencilState) { 54 depthStencilState = GrMtlDepthStencil::Create(fGpu, stencil, origin); 55 fDepthStencilStates.add(depthStencilState); 56 } 57 SkASSERT(depthStencilState); 58 return depthStencilState; 59} 60 61GrMtlSampler* GrMtlResourceProvider::findOrCreateCompatibleSampler(GrSamplerState params) { 62 GrMtlSampler* sampler; 63 sampler = fSamplers.find(GrMtlSampler::GenerateKey(params)); 64 if (!sampler) { 65 sampler = GrMtlSampler::Create(fGpu, params); 66 fSamplers.add(sampler); 67 } 68 SkASSERT(sampler); 69 return sampler; 70} 71 72void GrMtlResourceProvider::destroyResources() { 73 // Iterate through all stored GrMtlSamplers and unref them before resetting the hash. 74 SkTDynamicHash<GrMtlSampler, GrMtlSampler::Key>::Iter samplerIter(&fSamplers); 75 for (; !samplerIter.done(); ++samplerIter) { 76 (*samplerIter).unref(); 77 } 78 fSamplers.reset(); 79 80 // Iterate through all stored GrMtlDepthStencils and unref them before resetting the hash. 81 SkTDynamicHash<GrMtlDepthStencil, GrMtlDepthStencil::Key>::Iter dsIter(&fDepthStencilStates); 82 for (; !dsIter.done(); ++dsIter) { 83 (*dsIter).unref(); 84 } 85 fDepthStencilStates.reset(); 86 87 fPipelineStateCache->release(); 88} 89 90//////////////////////////////////////////////////////////////////////////////////////////////// 91 92#ifdef GR_PIPELINE_STATE_CACHE_STATS 93// Display pipeline state cache usage 94static const bool c_DisplayMtlPipelineCache{false}; 95#endif 96 97struct GrMtlResourceProvider::PipelineStateCache::Entry { 98 Entry(GrMtlGpu* gpu, GrMtlPipelineState* pipelineState) 99 : fGpu(gpu) 100 , fPipelineState(pipelineState) {} 101 102 GrMtlGpu* fGpu; 103 std::unique_ptr<GrMtlPipelineState> fPipelineState; 104}; 105 106GrMtlResourceProvider::PipelineStateCache::PipelineStateCache(GrMtlGpu* gpu) 107 : fMap(gpu->getContext()->priv().options().fRuntimeProgramCacheSize) 108 , fGpu(gpu) 109#ifdef GR_PIPELINE_STATE_CACHE_STATS 110 , fTotalRequests(0) 111 , fCacheMisses(0) 112#endif 113{} 114 115GrMtlResourceProvider::PipelineStateCache::~PipelineStateCache() { 116 SkASSERT(0 == fMap.count()); 117 // dump stats 118#ifdef GR_PIPELINE_STATE_CACHE_STATS 119 if (c_DisplayMtlPipelineCache) { 120 SkDebugf("--- Pipeline State Cache ---\n"); 121 SkDebugf("Total requests: %d\n", fTotalRequests); 122 SkDebugf("Cache misses: %d\n", fCacheMisses); 123 SkDebugf("Cache miss %%: %f\n", (fTotalRequests > 0) ? 124 100.f * fCacheMisses / fTotalRequests : 125 0.f); 126 SkDebugf("---------------------\n"); 127 } 128#endif 129} 130 131void GrMtlResourceProvider::PipelineStateCache::release() { 132 fMap.reset(); 133} 134 135GrMtlPipelineState* GrMtlResourceProvider::PipelineStateCache::refPipelineState( 136 GrRenderTarget* renderTarget, 137 const GrProgramInfo& programInfo) { 138#ifdef GR_PIPELINE_STATE_CACHE_STATS 139 ++fTotalRequests; 140#endif 141 142 const GrMtlCaps& caps = fGpu->mtlCaps(); 143 144 GrProgramDesc desc = caps.makeDesc(renderTarget, programInfo); 145 if (!desc.isValid()) { 146 GrCapsDebugf(fGpu->caps(), "Failed to build mtl program descriptor!\n"); 147 return nullptr; 148 } 149 150 std::unique_ptr<Entry>* entry = fMap.find(desc); 151 if (!entry) { 152#ifdef GR_PIPELINE_STATE_CACHE_STATS 153 ++fCacheMisses; 154#endif 155 GrMtlPipelineState* pipelineState(GrMtlPipelineStateBuilder::CreatePipelineState( 156 fGpu, renderTarget, desc, programInfo)); 157 if (!pipelineState) { 158 return nullptr; 159 } 160 entry = fMap.insert(desc, std::unique_ptr<Entry>(new Entry(fGpu, pipelineState))); 161 return (*entry)->fPipelineState.get(); 162 } 163 return (*entry)->fPipelineState.get(); 164} 165 166//////////////////////////////////////////////////////////////////////////////////////////////// 167 168static id<MTLBuffer> alloc_dynamic_buffer(id<MTLDevice> device, size_t size) { 169 NSUInteger options = 0; 170 if (@available(macOS 10.11, iOS 9.0, *)) { 171#ifdef SK_BUILD_FOR_MAC 172 options |= MTLResourceStorageModeManaged; 173#else 174 options |= MTLResourceStorageModeShared; 175#endif 176 } 177 return [device newBufferWithLength: size 178 options: options]; 179 180} 181 182// The idea here is that we create a ring buffer which is used for all dynamic allocations 183// below a certain size. When a dynamic GrMtlBuffer is mapped, it grabs a portion of this 184// buffer and uses it. On a subsequent map it will grab a different portion of the buffer. 185// This prevents the buffer from overwriting itself before it's submitted to the command 186// stream. 187 188GrMtlResourceProvider::BufferSuballocator::BufferSuballocator(id<MTLDevice> device, size_t size) 189 : fBuffer(alloc_dynamic_buffer(device, size)) 190 , fTotalSize(size) 191 , fHead(0) 192 , fTail(0) { 193 // We increment fHead and fTail without bound and let overflow handle any wrapping. 194 // Because of this, size needs to be a power of two. 195 SkASSERT(SkIsPow2(size)); 196} 197 198id<MTLBuffer> GrMtlResourceProvider::BufferSuballocator::getAllocation(size_t size, 199 size_t* offset) { 200 // capture current state locally (because fTail could be overwritten by the completion handler) 201 size_t head, tail; 202 SkAutoSpinlock lock(fMutex); 203 head = fHead; 204 tail = fTail; 205 206 // The head and tail indices increment without bound, wrapping with overflow, 207 // so we need to mod them down to the actual bounds of the allocation to determine 208 // which blocks are available. 209 size_t modHead = head & (fTotalSize - 1); 210 size_t modTail = tail & (fTotalSize - 1); 211 212 bool full = (head != tail && modHead == modTail); 213 214 215 // We don't want large allocations to eat up this buffer, so we allocate them separately. 216 if (full || size > fTotalSize/2) { 217 return nil; 218 } 219 220 // case 1: free space lies at the beginning and/or the end of the buffer 221 if (modHead >= modTail) { 222 // check for room at the end 223 if (fTotalSize - modHead < size) { 224 // no room at the end, check the beginning 225 if (modTail < size) { 226 // no room at the beginning 227 return nil; 228 } 229 // we are going to allocate from the beginning, adjust head to '0' position 230 head += fTotalSize - modHead; 231 modHead = 0; 232 } 233 // case 2: free space lies in the middle of the buffer, check for room there 234 } else if (modTail - modHead < size) { 235 // no room in the middle 236 return nil; 237 } 238 239 *offset = modHead; 240 // We're not sure what the usage of the next allocation will be -- 241 // to be safe we'll use 16 byte alignment. 242 fHead = GrAlignTo(head + size, 16); 243 return fBuffer; 244} 245 246void GrMtlResourceProvider::BufferSuballocator::addCompletionHandler( 247 GrMtlCommandBuffer* cmdBuffer) { 248 this->ref(); 249 SkAutoSpinlock lock(fMutex); 250 size_t newTail = fHead; 251 cmdBuffer->addCompletedHandler(^(id <MTLCommandBuffer>commandBuffer) { 252 // Make sure SkAutoSpinlock goes out of scope before 253 // the BufferSuballocator is potentially deleted. 254 { 255 SkAutoSpinlock lock(fMutex); 256 fTail = newTail; 257 } 258 this->unref(); 259 }); 260} 261 262id<MTLBuffer> GrMtlResourceProvider::getDynamicBuffer(size_t size, size_t* offset) { 263#ifdef SK_BUILD_FOR_MAC 264 // Mac requires 4-byte alignment for didModifyRange: 265 size = SkAlign4(size); 266#endif 267 id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset); 268 if (buffer) { 269 return buffer; 270 } 271 272 // Try to grow allocation (old allocation will age out). 273 // We grow up to a maximum size, and only grow if the requested allocation will 274 // fit into half of the new buffer (to prevent very large transient buffers forcing 275 // growth when they'll never fit anyway). 276 if (fBufferSuballocator->size() < fBufferSuballocatorMaxSize && 277 size <= fBufferSuballocator->size()) { 278 fBufferSuballocator.reset(new BufferSuballocator(fGpu->device(), 279 2*fBufferSuballocator->size())); 280 id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset); 281 if (buffer) { 282 return buffer; 283 } 284 } 285 286 *offset = 0; 287 return alloc_dynamic_buffer(fGpu->device(), size); 288} 289 290void GrMtlResourceProvider::addBufferCompletionHandler(GrMtlCommandBuffer* cmdBuffer) { 291 fBufferSuballocator->addCompletionHandler(cmdBuffer); 292} 293