• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "src/gpu/mtl/GrMtlResourceProvider.h"
9
10#include "src/gpu/mtl/GrMtlCommandBuffer.h"
11#include "src/gpu/mtl/GrMtlGpu.h"
12#include "src/gpu/mtl/GrMtlPipelineState.h"
13#include "src/gpu/mtl/GrMtlUtil.h"
14
15#include "src/sksl/SkSLCompiler.h"
16
17#if !__has_feature(objc_arc)
18#error This file must be compiled with Arc. Use -fobjc-arc flag
19#endif
20
21GrMtlResourceProvider::GrMtlResourceProvider(GrMtlGpu* gpu)
22    : fGpu(gpu) {
23    fPipelineStateCache.reset(new PipelineStateCache(gpu));
24    fBufferSuballocator.reset(new BufferSuballocator(gpu->device(), kBufferSuballocatorStartSize));
25    // TODO: maxBufferLength seems like a reasonable metric to determine fBufferSuballocatorMaxSize
26    // but may need tuning. Might also need a GrContextOption to let the client set this.
27#ifdef SK_BUILD_FOR_MAC
28    int64_t maxBufferLength = 1024*1024*1024;
29#else
30    int64_t maxBufferLength = 256*1024*1024;
31#endif
32#if GR_METAL_SDK_VERSION >= 200
33    if ([gpu->device() respondsToSelector:@selector(maxBufferLength)]) {
34       maxBufferLength = gpu->device().maxBufferLength;
35    }
36#endif
37    fBufferSuballocatorMaxSize = maxBufferLength/16;
38}
39
40GrMtlPipelineState* GrMtlResourceProvider::findOrCreateCompatiblePipelineState(
41        GrRenderTarget* renderTarget, GrSurfaceOrigin origin,
42        const GrPipeline& pipeline, const GrPrimitiveProcessor& proc,
43        const GrTextureProxy* const primProcProxies[], GrPrimitiveType primType) {
44    return fPipelineStateCache->refPipelineState(renderTarget, origin, proc, primProcProxies,
45                                                 pipeline, primType);
46}
47
48////////////////////////////////////////////////////////////////////////////////////////////////
49
50GrMtlDepthStencil* GrMtlResourceProvider::findOrCreateCompatibleDepthStencilState(
51        const GrStencilSettings& stencil, GrSurfaceOrigin origin) {
52    GrMtlDepthStencil* depthStencilState;
53    GrMtlDepthStencil::Key key = GrMtlDepthStencil::GenerateKey(stencil, origin);
54    depthStencilState = fDepthStencilStates.find(key);
55    if (!depthStencilState) {
56        depthStencilState = GrMtlDepthStencil::Create(fGpu, stencil, origin);
57        fDepthStencilStates.add(depthStencilState);
58    }
59    SkASSERT(depthStencilState);
60    return depthStencilState;
61}
62
63GrMtlSampler* GrMtlResourceProvider::findOrCreateCompatibleSampler(const GrSamplerState& params,
64                                                                   uint32_t maxMipLevel) {
65    GrMtlSampler* sampler;
66    sampler = fSamplers.find(GrMtlSampler::GenerateKey(params, maxMipLevel));
67    if (!sampler) {
68        sampler = GrMtlSampler::Create(fGpu, params, maxMipLevel);
69        fSamplers.add(sampler);
70    }
71    SkASSERT(sampler);
72    return sampler;
73}
74
75void GrMtlResourceProvider::destroyResources() {
76    // Iterate through all stored GrMtlSamplers and unref them before resetting the hash.
77    SkTDynamicHash<GrMtlSampler, GrMtlSampler::Key>::Iter samplerIter(&fSamplers);
78    for (; !samplerIter.done(); ++samplerIter) {
79        (*samplerIter).unref();
80    }
81    fSamplers.reset();
82
83    // Iterate through all stored GrMtlDepthStencils and unref them before resetting the hash.
84    SkTDynamicHash<GrMtlDepthStencil, GrMtlDepthStencil::Key>::Iter dsIter(&fDepthStencilStates);
85    for (; !dsIter.done(); ++dsIter) {
86        (*dsIter).unref();
87    }
88    fDepthStencilStates.reset();
89
90    fPipelineStateCache->release();
91}
92
93////////////////////////////////////////////////////////////////////////////////////////////////
94
95#ifdef GR_PIPELINE_STATE_CACHE_STATS
96// Display pipeline state cache usage
97static const bool c_DisplayMtlPipelineCache{false};
98#endif
99
100struct GrMtlResourceProvider::PipelineStateCache::Entry {
101    Entry(GrMtlGpu* gpu, GrMtlPipelineState* pipelineState)
102    : fGpu(gpu)
103    , fPipelineState(pipelineState) {}
104
105    GrMtlGpu* fGpu;
106    std::unique_ptr<GrMtlPipelineState> fPipelineState;
107};
108
109GrMtlResourceProvider::PipelineStateCache::PipelineStateCache(GrMtlGpu* gpu)
110    : fMap(kMaxEntries)
111    , fGpu(gpu)
112#ifdef GR_PIPELINE_STATE_CACHE_STATS
113    , fTotalRequests(0)
114    , fCacheMisses(0)
115#endif
116{}
117
118GrMtlResourceProvider::PipelineStateCache::~PipelineStateCache() {
119    SkASSERT(0 == fMap.count());
120    // dump stats
121#ifdef GR_PIPELINE_STATE_CACHE_STATS
122    if (c_DisplayMtlPipelineCache) {
123        SkDebugf("--- Pipeline State Cache ---\n");
124        SkDebugf("Total requests: %d\n", fTotalRequests);
125        SkDebugf("Cache misses: %d\n", fCacheMisses);
126        SkDebugf("Cache miss %%: %f\n", (fTotalRequests > 0) ?
127                 100.f * fCacheMisses / fTotalRequests :
128                 0.f);
129        SkDebugf("---------------------\n");
130    }
131#endif
132}
133
134void GrMtlResourceProvider::PipelineStateCache::release() {
135    fMap.reset();
136}
137
138GrMtlPipelineState* GrMtlResourceProvider::PipelineStateCache::refPipelineState(
139        GrRenderTarget* renderTarget,
140        GrSurfaceOrigin origin,
141        const GrPrimitiveProcessor& primProc,
142        const GrTextureProxy* const primProcProxies[],
143        const GrPipeline& pipeline,
144        GrPrimitiveType primType) {
145#ifdef GR_PIPELINE_STATE_CACHE_STATS
146    ++fTotalRequests;
147#endif
148    // Get GrMtlProgramDesc
149    GrMtlPipelineStateBuilder::Desc desc;
150    if (!GrMtlPipelineStateBuilder::Desc::Build(&desc, renderTarget, primProc, pipeline, primType,
151                                                fGpu)) {
152        GrCapsDebugf(fGpu->caps(), "Failed to build mtl program descriptor!\n");
153        return nullptr;
154    }
155    // If we knew the shader won't depend on origin, we could skip this (and use the same program
156    // for both origins). Instrumenting all fragment processors would be difficult and error prone.
157    desc.setSurfaceOriginKey(GrGLSLFragmentShaderBuilder::KeyForSurfaceOrigin(origin));
158
159    std::unique_ptr<Entry>* entry = fMap.find(desc);
160    if (!entry) {
161#ifdef GR_PIPELINE_STATE_CACHE_STATS
162        ++fCacheMisses;
163#endif
164        GrMtlPipelineState* pipelineState(GrMtlPipelineStateBuilder::CreatePipelineState(
165                fGpu, renderTarget, origin, primProc, primProcProxies, pipeline, &desc));
166        if (nullptr == pipelineState) {
167            return nullptr;
168        }
169        entry = fMap.insert(desc, std::unique_ptr<Entry>(new Entry(fGpu, pipelineState)));
170        return (*entry)->fPipelineState.get();
171    }
172    return (*entry)->fPipelineState.get();
173}
174
175////////////////////////////////////////////////////////////////////////////////////////////////
176
177static id<MTLBuffer> alloc_dynamic_buffer(id<MTLDevice> device, size_t size) {
178    return [device newBufferWithLength: size
179#ifdef SK_BUILD_FOR_MAC
180                               options: MTLResourceStorageModeManaged];
181#else
182                               options: MTLResourceStorageModeShared];
183#endif
184
185}
186
187// The idea here is that we create a ring buffer which is used for all dynamic allocations
188// below a certain size. When a dynamic GrMtlBuffer is mapped, it grabs a portion of this
189// buffer and uses it. On a subsequent map it will grab a different portion of the buffer.
190// This prevents the buffer from overwriting itself before it's submitted to the command
191// stream.
192
193GrMtlResourceProvider::BufferSuballocator::BufferSuballocator(id<MTLDevice> device, size_t size)
194        : fBuffer(alloc_dynamic_buffer(device, size))
195        , fTotalSize(size)
196        , fHead(0)
197        , fTail(0) {
198    // We increment fHead and fTail without bound and let overflow handle any wrapping.
199    // Because of this, size needs to be a power of two.
200    SkASSERT(SkIsPow2(size));
201}
202
203id<MTLBuffer> GrMtlResourceProvider::BufferSuballocator::getAllocation(size_t size,
204                                                                       size_t* offset) {
205    // capture current state locally (because fTail could be overwritten by the completion handler)
206    size_t head, tail;
207    SkAutoSpinlock lock(fMutex);
208    head = fHead;
209    tail = fTail;
210
211    // The head and tail indices increment without bound, wrapping with overflow,
212    // so we need to mod them down to the actual bounds of the allocation to determine
213    // which blocks are available.
214    size_t modHead = head & (fTotalSize - 1);
215    size_t modTail = tail & (fTotalSize - 1);
216
217    bool full = (head != tail && modHead == modTail);
218
219
220    // We don't want large allocations to eat up this buffer, so we allocate them separately.
221    if (full || size > fTotalSize/2) {
222        return nil;
223    }
224
225    // case 1: free space lies at the beginning and/or the end of the buffer
226    if (modHead >= modTail) {
227        // check for room at the end
228        if (fTotalSize - modHead < size) {
229            // no room at the end, check the beginning
230            if (modTail < size) {
231                // no room at the beginning
232                return nil;
233            }
234            // we are going to allocate from the beginning, adjust head to '0' position
235            head += fTotalSize - modHead;
236            modHead = 0;
237        }
238    // case 2: free space lies in the middle of the buffer, check for room there
239    } else if (modTail - modHead < size) {
240        // no room in the middle
241        return nil;
242    }
243
244    *offset = modHead;
245    // We're not sure what the usage of the next allocation will be --
246    // to be safe we'll use 16 byte alignment.
247    fHead = GrSizeAlignUp(head + size, 16);
248    return fBuffer;
249}
250
251void GrMtlResourceProvider::BufferSuballocator::addCompletionHandler(
252        GrMtlCommandBuffer* cmdBuffer) {
253    this->ref();
254    SkAutoSpinlock lock(fMutex);
255    size_t newTail = fHead;
256    cmdBuffer->addCompletedHandler(^(id <MTLCommandBuffer>commandBuffer) {
257        // Make sure SkAutoSpinlock goes out of scope before
258        // the BufferSuballocator is potentially deleted.
259        {
260            SkAutoSpinlock lock(fMutex);
261            fTail = newTail;
262        }
263        this->unref();
264    });
265}
266
267id<MTLBuffer> GrMtlResourceProvider::getDynamicBuffer(size_t size, size_t* offset) {
268    id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset);
269    if (buffer) {
270        return buffer;
271    }
272
273    // Try to grow allocation (old allocation will age out).
274    // We grow up to a maximum size, and only grow if the requested allocation will
275    // fit into half of the new buffer (to prevent very large transient buffers forcing
276    // growth when they'll never fit anyway).
277    if (fBufferSuballocator->size() < fBufferSuballocatorMaxSize &&
278        size <= fBufferSuballocator->size()) {
279        fBufferSuballocator.reset(new BufferSuballocator(fGpu->device(),
280                                                         2*fBufferSuballocator->size()));
281        id<MTLBuffer> buffer = fBufferSuballocator->getAllocation(size, offset);
282        if (buffer) {
283            return buffer;
284        }
285    }
286
287    *offset = 0;
288    return alloc_dynamic_buffer(fGpu->device(), size);
289}
290
291void GrMtlResourceProvider::addBufferCompletionHandler(GrMtlCommandBuffer* cmdBuffer) {
292    fBufferSuballocator->addCompletionHandler(cmdBuffer);
293}
294