1/* 2 * Copyright 2018 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "src/gpu/mtl/GrMtlResourceProvider.h" 9 10#include "src/gpu/mtl/GrMtlCommandBuffer.h" 11#include "src/gpu/mtl/GrMtlGpu.h" 12#include "src/gpu/mtl/GrMtlPipelineState.h" 13#include "src/gpu/mtl/GrMtlUtil.h" 14 15#include "src/sksl/SkSLCompiler.h" 16 17#if !__has_feature(objc_arc) 18#error This file must be compiled with Arc. Use -fobjc-arc flag 19#endif 20 21GrMtlResourceProvider::GrMtlResourceProvider(GrMtlGpu* gpu) 22 : fGpu(gpu) { 23 fPipelineStateCache.reset(new PipelineStateCache(gpu)); 24 fBufferSuballocator.reset(new BufferSuballocator(gpu->device(), kBufferSuballocatorStartSize)); 25 // TODO: maxBufferLength seems like a reasonable metric to determine fBufferSuballocatorMaxSize 26 // but may need tuning. Might also need a GrContextOption to let the client set this. 27#ifdef SK_BUILD_FOR_MAC 28 int64_t maxBufferLength = 1024*1024*1024; 29#else 30 int64_t maxBufferLength = 256*1024*1024; 31#endif 32#if GR_METAL_SDK_VERSION >= 200 33 if ([gpu->device() respondsToSelector:@selector(maxBufferLength)]) { 34 maxBufferLength = gpu->device().maxBufferLength; 35 } 36#endif 37 fBufferSuballocatorMaxSize = maxBufferLength/16; 38} 39 40GrMtlPipelineState* GrMtlResourceProvider::findOrCreateCompatiblePipelineState( 41 GrRenderTarget* renderTarget, GrSurfaceOrigin origin, 42 const GrPipeline& pipeline, const GrPrimitiveProcessor& proc, 43 const GrTextureProxy* const primProcProxies[], GrPrimitiveType primType) { 44 return fPipelineStateCache->refPipelineState(renderTarget, origin, proc, primProcProxies, 45 pipeline, primType); 46} 47 48//////////////////////////////////////////////////////////////////////////////////////////////// 49 50GrMtlDepthStencil* GrMtlResourceProvider::findOrCreateCompatibleDepthStencilState( 51 const GrStencilSettings& stencil, GrSurfaceOrigin origin) { 52 GrMtlDepthStencil* depthStencilState; 53 GrMtlDepthStencil::Key key = GrMtlDepthStencil::GenerateKey(stencil, origin); 54 depthStencilState = fDepthStencilStates.find(key); 55 if (!depthStencilState) { 56 depthStencilState = GrMtlDepthStencil::Create(fGpu, stencil, origin); 57 fDepthStencilStates.add(depthStencilState); 58 } 59 SkASSERT(depthStencilState); 60 return depthStencilState; 61} 62 63GrMtlSampler* GrMtlResourceProvider::findOrCreateCompatibleSampler(const GrSamplerState& params, 64 uint32_t maxMipLevel) { 65 GrMtlSampler* sampler; 66 sampler = fSamplers.find(GrMtlSampler::GenerateKey(params, maxMipLevel)); 67 if (!sampler) { 68 sampler = GrMtlSampler::Create(fGpu, params, maxMipLevel); 69 fSamplers.add(sampler); 70 } 71 SkASSERT(sampler); 72 return sampler; 73} 74 75void GrMtlResourceProvider::destroyResources() { 76 // Iterate through all stored GrMtlSamplers and unref them before resetting the hash. 77 SkTDynamicHash<GrMtlSampler, GrMtlSampler::Key>::Iter samplerIter(&fSamplers); 78 for (; !samplerIter.done(); ++samplerIter) { 79 (*samplerIter).unref(); 80 } 81 fSamplers.reset(); 82 83 // Iterate through all stored GrMtlDepthStencils and unref them before resetting the hash. 84 SkTDynamicHash<GrMtlDepthStencil, GrMtlDepthStencil::Key>::Iter dsIter(&fDepthStencilStates); 85 for (; !dsIter.done(); ++dsIter) { 86 (*dsIter).unref(); 87 } 88 fDepthStencilStates.reset(); 89 90 fPipelineStateCache->release(); 91} 92 93//////////////////////////////////////////////////////////////////////////////////////////////// 94 95#ifdef GR_PIPELINE_STATE_CACHE_STATS 96// Display pipeline state cache usage 97static const bool c_DisplayMtlPipelineCache{false}; 98#endif 99 100struct GrMtlResourceProvider::PipelineStateCache::Entry { 101 Entry(GrMtlGpu* gpu, GrMtlPipelineState* pipelineState) 102 : fGpu(gpu) 103 , fPipelineState(pipelineState) {} 104 105 GrMtlGpu* fGpu; 106 std::unique_ptr<GrMtlPipelineState> fPipelineState; 107}; 108 109GrMtlResourceProvider::PipelineStateCache::PipelineStateCache(GrMtlGpu* gpu) 110 : fMap(kMaxEntries) 111 , fGpu(gpu) 112#ifdef GR_PIPELINE_STATE_CACHE_STATS 113 , fTotalRequests(0) 114 , fCacheMisses(0) 115#endif 116{} 117 118GrMtlResourceProvider::PipelineStateCache::~PipelineStateCache() { 119 SkASSERT(0 == fMap.count()); 120 // dump stats 121#ifdef GR_PIPELINE_STATE_CACHE_STATS 122 if (c_DisplayMtlPipelineCache) { 123 SkDebugf("--- Pipeline State Cache ---\n"); 124 SkDebugf("Total requests: %d\n", fTotalRequests); 125 SkDebugf("Cache misses: %d\n", fCacheMisses); 126 SkDebugf("Cache miss %%: %f\n", (fTotalRequests > 0) ? 127 100.f * fCacheMisses / fTotalRequests : 128 0.f); 129 SkDebugf("---------------------\n"); 130 } 131#endif 132} 133 134void GrMtlResourceProvider::PipelineStateCache::release() { 135 fMap.reset(); 136} 137 138GrMtlPipelineState* GrMtlResourceProvider::PipelineStateCache::refPipelineState( 139 GrRenderTarget* renderTarget, 140 GrSurfaceOrigin origin, 141 const GrPrimitiveProcessor& primProc, 142 const GrTextureProxy* const primProcProxies[], 143 const GrPipeline& pipeline, 144 GrPrimitiveType primType) { 145#ifdef GR_PIPELINE_STATE_CACHE_STATS 146 ++fTotalRequests; 147#endif 148 // Get GrMtlProgramDesc 149 GrMtlPipelineStateBuilder::Desc desc; 150 if (!GrMtlPipelineStateBuilder::Desc::Build(&desc, renderTarget, primProc, pipeline, primType, 151 fGpu)) { 152 GrCapsDebugf(fGpu->caps(), "Failed to build mtl program descriptor!\n"); 153 return nullptr; 154 } 155 // If we knew the shader won't depend on origin, we could skip this (and use the same program 156 // for both origins). Instrumenting all fragment processors would be difficult and error prone. 157 desc.setSurfaceOriginKey(GrGLSLFragmentShaderBuilder::KeyForSurfaceOrigin(origin)); 158 159 std::unique_ptr<Entry>* entry = fMap.find(desc); 160 if (!entry) { 161#ifdef GR_PIPELINE_STATE_CACHE_STATS 162 ++fCacheMisses; 163#endif 164 GrMtlPipelineState* pipelineState(GrMtlPipelineStateBuilder::CreatePipelineState( 165 fGpu, renderTarget, origin, primProc, primProcProxies, pipeline, &desc)); 166 if (nullptr == pipelineState) { 167 return nullptr; 168 } 169 entry = fMap.insert(desc, std::unique_ptr<Entry>(new Entry(fGpu, pipelineState))); 170 return (*entry)->fPipelineState.get(); 171 } 172 return (*entry)->fPipelineState.get(); 173} 174 175//////////////////////////////////////////////////////////////////////////////////////////////// 176 177static id<MTLBuffer> alloc_dynamic_buffer(id<MTLDevice> device, size_t size) { 178 return [device newBufferWithLength: size 179#ifdef SK_BUILD_FOR_MAC 180 options: MTLResourceStorageModeManaged]; 181#else 182 options: MTLResourceStorageModeShared]; 183#endif 184 185} 186 187// The idea here is that we create a ring buffer which is used for all dynamic allocations 188// below a certain size. When a dynamic GrMtlBuffer is mapped, it grabs a portion of this 189// buffer and uses it. On a subsequent map it will grab a different portion of the buffer. 190// This prevents the buffer from overwriting itself before it's submitted to the command 191// stream. 192 193GrMtlResourceProvider::BufferSuballocator::BufferSuballocator(id<MTLDevice> device, size_t size) 194 : fBuffer(alloc_dynamic_buffer(device, size)) 195 , fTotalSize(size) 196 , fHead(0) 197 , fTail(0) { 198 // We increment fHead and fTail without bound and let overflow handle any wrapping. 199 // Because of this, size needs to be a power of two. 200 SkASSERT(SkIsPow2(size)); 201} 202 203id<MTLBuffer> GrMtlResourceProvider::BufferSuballocator::getAllocation(size_t size, 204 size_t* offset) { 205 // capture current state locally (because fTail could be overwritten by the completion handler) 206 size_t head, tail; 207 SkAutoSpinlock lock(fMutex); 208 head = fHead; 209 tail = fTail; 210 211 // The head and tail indices increment without bound, wrapping with overflow, 212 // so we need to mod them down to the actual bounds of the allocation to determine 213 // which blocks are available. 214 size_t modHead = head & (fTotalSize - 1); 215 size_t modTail = tail & (fTotalSize - 1); 216 217 bool full = (head != tail && modHead == modTail); 218 219 220 // We don't want large allocations to eat up this buffer, so we allocate them separately. 221 if (full || size > fTotalSize/2) { 222 return nil; 223 } 224 225 // case 1: free space lies at the beginning and/or the end of the buffer 226 if (modHead >= modTail) { 227 // check for room at the end 228 if (fTotalSize - modHead < size) { 229 // no room at the end, check the beginning 230 if (modTail < size) { 231 // no room at the beginning 232 return nil; 233 } 234 // we are going to allocate from the beginning, adjust head to '0' position 235 head += fTotalSize - modHead; 236 modHead = 0; 237 } 238 // case 2: free space lies in the middle of the buffer, check for room there 239 } else if (modTail - modHead < size) { 240 // no room in the middle 241 return nil; 242 } 243 244 *offset = modHead; 245 // We're not sure what the usage of the next allocation will be -- 246 // to be safe we'll use 16 byte alignment. 247 fHead = GrSizeAlignUp(head + size, 16); 248 return fBuffer; 249} 250 251void GrMtlResourceProvider::BufferSuballocator::addCompletionHandler( 252 GrMtlCommandBuffer* cmdBuffer) { 253 this->ref(); 254 SkAutoSpinlock lock(fMutex); 255 size_t newTail = fHead; 256 cmdBuffer->addCompletedHandler(^(id <MTLCommandBuffer>commandBuffer) { 257 // Make sure SkAutoSpinlock goes out of scope before 258 // the BufferSuballocator is potentially deleted. 259 { 260 SkAutoSpinlock lock(fMutex); 261 fTail = newTail; 262 } 263 this->unref(); 264 }); 265} 266 267id<MTLBuffer> GrMtlResourceProvider::getDynamicBuffer(size_t size, size_t* offset) { 268 id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset); 269 if (buffer) { 270 return buffer; 271 } 272 273 // Try to grow allocation (old allocation will age out). 274 // We grow up to a maximum size, and only grow if the requested allocation will 275 // fit into half of the new buffer (to prevent very large transient buffers forcing 276 // growth when they'll never fit anyway). 277 if (fBufferSuballocator->size() < fBufferSuballocatorMaxSize && 278 size <= fBufferSuballocator->size()) { 279 fBufferSuballocator.reset(new BufferSuballocator(fGpu->device(), 280 2*fBufferSuballocator->size())); 281 id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset); 282 if (buffer) { 283 return buffer; 284 } 285 } 286 287 *offset = 0; 288 return alloc_dynamic_buffer(fGpu->device(), size); 289} 290 291void GrMtlResourceProvider::addBufferCompletionHandler(GrMtlCommandBuffer* cmdBuffer) { 292 fBufferSuballocator->addCompletionHandler(cmdBuffer); 293} 294