• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "util/hash_table.h"
28 #include "util/list.h"
29 #include "util/set.h"
30 #include "util/u_string.h"
31 #define XXH_INLINE_ALL
32 #include "util/xxhash.h"
33 
34 #include "freedreno_batch.h"
35 #include "freedreno_batch_cache.h"
36 #include "freedreno_context.h"
37 #include "freedreno_resource.h"
38 
39 /* Overview:
40  *
41  *   The batch cache provides lookup for mapping pipe_framebuffer_state
42  *   to a batch.
43  *
44  *   It does this via hashtable, with key that roughly matches the
45  *   pipe_framebuffer_state, as described below.
46  *
47  * Batch Cache hashtable key:
48  *
49  *   To serialize the key, and to avoid dealing with holding a reference to
50  *   pipe_surface's (which hold a reference to pipe_resource and complicate
51  *   the whole refcnting thing), the key is variable length and inline's the
52  *   pertinent details of the pipe_surface.
53  *
54  * Batch:
55  *
56  *   Each batch needs to hold a reference to each resource it depends on (ie.
57  *   anything that needs a mem2gmem).  And a weak reference to resources it
58  *   renders to.  (If both src[n] and dst[n] are not NULL then they are the
59  *   same.)
60  *
61  *   When a resource is destroyed, we need to remove entries in the batch
62  *   cache that reference the resource, to avoid dangling pointer issues.
63  *   So each resource holds a hashset of batches which have reference them
64  *   in their hashtable key.
65  *
66  *   When a batch has weak reference to no more resources (ie. all the
67  *   surfaces it rendered to are destroyed) the batch can be destroyed.
68  *   Could happen in an app that renders and never uses the result.  More
69  *   common scenario, I think, will be that some, but not all, of the
70  *   surfaces are destroyed before the batch is submitted.
71  *
72  *   If (for example), batch writes to zsbuf but that surface is destroyed
73  *   before batch is submitted, we can skip gmem2mem (but still need to
74  *   alloc gmem space as before.  If the batch depended on previous contents
75  *   of that surface, it would be holding a reference so the surface would
76  *   not have been destroyed.
77  */
78 
79 struct fd_batch_key {
80    uint32_t width;
81    uint32_t height;
82    uint16_t layers;
83    uint16_t samples;
84    uint16_t num_surfs;
85    uint16_t ctx_seqno;
86    struct {
87       struct pipe_resource *texture;
88       union pipe_surface_desc u;
89       uint8_t pos, samples;
90       uint16_t format;
91    } surf[0];
92 };
93 
94 static struct fd_batch_key *
key_alloc(unsigned num_surfs)95 key_alloc(unsigned num_surfs)
96 {
97    struct fd_batch_key *key = CALLOC_VARIANT_LENGTH_STRUCT(
98       fd_batch_key, sizeof(key->surf[0]) * num_surfs);
99    return key;
100 }
101 
102 uint32_t
fd_batch_key_hash(const void * _key)103 fd_batch_key_hash(const void *_key)
104 {
105    const struct fd_batch_key *key = _key;
106    uint32_t hash = 0;
107    hash = XXH32(key, offsetof(struct fd_batch_key, surf[0]), hash);
108    hash = XXH32(key->surf, sizeof(key->surf[0]) * key->num_surfs, hash);
109    return hash;
110 }
111 
112 bool
fd_batch_key_equals(const void * _a,const void * _b)113 fd_batch_key_equals(const void *_a, const void *_b)
114 {
115    const struct fd_batch_key *a = _a;
116    const struct fd_batch_key *b = _b;
117    return (memcmp(a, b, offsetof(struct fd_batch_key, surf[0])) == 0) &&
118           (memcmp(a->surf, b->surf, sizeof(a->surf[0]) * a->num_surfs) == 0);
119 }
120 
121 struct fd_batch_key *
fd_batch_key_clone(void * mem_ctx,const struct fd_batch_key * key)122 fd_batch_key_clone(void *mem_ctx, const struct fd_batch_key *key)
123 {
124    unsigned sz =
125       sizeof(struct fd_batch_key) + (sizeof(key->surf[0]) * key->num_surfs);
126    struct fd_batch_key *new_key = rzalloc_size(mem_ctx, sz);
127    memcpy(new_key, key, sz);
128    return new_key;
129 }
130 
131 void
fd_bc_init(struct fd_batch_cache * cache)132 fd_bc_init(struct fd_batch_cache *cache)
133 {
134    cache->ht =
135       _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
136 }
137 
138 void
fd_bc_fini(struct fd_batch_cache * cache)139 fd_bc_fini(struct fd_batch_cache *cache)
140 {
141    _mesa_hash_table_destroy(cache->ht, NULL);
142 }
143 
144 /* Flushes all batches in the batch cache.  Used at glFlush() and similar times. */
145 void
fd_bc_flush(struct fd_context * ctx,bool deferred)146 fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt
147 {
148    struct fd_batch_cache *cache = &ctx->screen->batch_cache;
149 
150    /* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)
151     * can cause batches to be unref'd and freed under our feet, so grab
152     * a reference to all the batches we need up-front.
153     */
154    struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};
155    struct fd_batch *batch;
156    unsigned n = 0;
157 
158    fd_screen_lock(ctx->screen);
159 
160    foreach_batch (batch, cache, cache->batch_mask) {
161       if (batch->ctx == ctx) {
162          fd_batch_reference_locked(&batches[n++], batch);
163       }
164    }
165 
166    /* deferred flush doesn't actually flush, but it marks every other
167     * batch associated with the context as dependent on the current
168     * batch.  So when the current batch gets flushed, all other batches
169     * that came before also get flushed.
170     */
171    if (deferred) {
172       struct fd_batch *current_batch = fd_context_batch(ctx);
173       struct fd_batch *deps[ARRAY_SIZE(cache->batches)] = {0};
174       unsigned ndeps = 0;
175 
176       /* To avoid a dependency loop, pull out any batches that already
177        * have a dependency on the current batch.  This ensures the
178        * following loop adding a dependency to the current_batch, all
179        * remaining batches do not have a direct or indirect dependency
180        * on the current_batch.
181        *
182        * The batches that have a dependency on the current batch will
183        * be flushed immediately (after dropping screen lock) instead
184        */
185       for (unsigned i = 0; i < n; i++) {
186          if ((batches[i] != current_batch) &&
187              fd_batch_has_dep(batches[i], current_batch)) {
188             /* We can't immediately flush while we hold the screen lock,
189              * but that doesn't matter.  We just want to skip adding any
190              * deps that would result in a loop, we can flush after we've
191              * updated the dependency graph and dropped the lock.
192              */
193             fd_batch_reference_locked(&deps[ndeps++], batches[i]);
194             fd_batch_reference_locked(&batches[i], NULL);
195          }
196       }
197 
198       for (unsigned i = 0; i < n; i++) {
199          if (batches[i] && (batches[i] != current_batch) &&
200                (batches[i]->ctx == current_batch->ctx)) {
201             fd_batch_add_dep(current_batch, batches[i]);
202          }
203       }
204 
205       fd_batch_reference_locked(&current_batch, NULL);
206 
207       fd_screen_unlock(ctx->screen);
208 
209       /* If we have any batches that we could add a dependency on (unlikely)
210        * flush them immediately.
211        */
212       for (unsigned i = 0; i < ndeps; i++) {
213          fd_batch_flush(deps[i]);
214          fd_batch_reference(&deps[i], NULL);
215       }
216    } else {
217       fd_screen_unlock(ctx->screen);
218 
219       for (unsigned i = 0; i < n; i++) {
220          fd_batch_flush(batches[i]);
221       }
222    }
223 
224    for (unsigned i = 0; i < n; i++) {
225       fd_batch_reference(&batches[i], NULL);
226    }
227 }
228 
229 /**
230  * Flushes the batch (if any) writing this resource.  Must not hold the screen
231  * lock.
232  */
233 void
fd_bc_flush_writer(struct fd_context * ctx,struct fd_resource * rsc)234 fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt
235 {
236    fd_screen_lock(ctx->screen);
237    struct fd_batch *write_batch = NULL;
238    fd_batch_reference_locked(&write_batch, rsc->track->write_batch);
239    fd_screen_unlock(ctx->screen);
240 
241    if (write_batch) {
242       if (write_batch->ctx == ctx)
243          fd_batch_flush(write_batch);
244       fd_batch_reference(&write_batch, NULL);
245    }
246 }
247 
248 /**
249  * Flushes any batches reading this resource.  Must not hold the screen lock.
250  */
251 void
fd_bc_flush_readers(struct fd_context * ctx,struct fd_resource * rsc)252 fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt
253 {
254    struct fd_batch *batch, *batches[32] = {};
255    uint32_t batch_count = 0;
256 
257    /* This is a bit awkward, probably a fd_batch_flush_locked()
258     * would make things simpler.. but we need to hold the lock
259     * to iterate the batches which reference this resource.  So
260     * we must first grab references under a lock, then flush.
261     */
262    fd_screen_lock(ctx->screen);
263    foreach_batch (batch, &ctx->screen->batch_cache, rsc->track->batch_mask)
264       fd_batch_reference_locked(&batches[batch_count++], batch);
265    fd_screen_unlock(ctx->screen);
266 
267    for (int i = 0; i < batch_count; i++) {
268       if (batches[i]->ctx == ctx)
269          fd_batch_flush(batches[i]);
270       fd_batch_reference(&batches[i], NULL);
271    }
272 }
273 
274 void
fd_bc_dump(struct fd_context * ctx,const char * fmt,...)275 fd_bc_dump(struct fd_context *ctx, const char *fmt, ...)
276 {
277    struct fd_batch_cache *cache = &ctx->screen->batch_cache;
278 
279    if (!FD_DBG(MSGS))
280       return;
281 
282    fd_screen_lock(ctx->screen);
283 
284    va_list ap;
285    va_start(ap, fmt);
286    vprintf(fmt, ap);
287    va_end(ap);
288 
289    for (int i = 0; i < ARRAY_SIZE(cache->batches); i++) {
290       struct fd_batch *batch = cache->batches[i];
291       if (batch) {
292          printf("  %p<%u>%s\n", batch, batch->seqno,
293                 batch->needs_flush ? ", NEEDS FLUSH" : "");
294       }
295    }
296 
297    printf("----\n");
298 
299    fd_screen_unlock(ctx->screen);
300 }
301 
302 /**
303  * Note that when batch is flushed, it needs to remain in the cache so
304  * that fd_bc_invalidate_resource() can work.. otherwise we can have
305  * the case where a rsc is destroyed while a batch still has a dangling
306  * reference to it.
307  *
308  * Note that the cmdstream (or, after the SUBMIT ioctl, the kernel)
309  * would have a reference to the underlying bo, so it is ok for the
310  * rsc to be destroyed before the batch.
311  */
312 void
fd_bc_invalidate_batch(struct fd_batch * batch,bool remove)313 fd_bc_invalidate_batch(struct fd_batch *batch, bool remove)
314 {
315    if (!batch)
316       return;
317 
318    struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
319    struct fd_batch_key *key = batch->key;
320 
321    fd_screen_assert_locked(batch->ctx->screen);
322 
323    if (remove) {
324       cache->batches[batch->idx] = NULL;
325       cache->batch_mask &= ~(1 << batch->idx);
326    }
327 
328    if (!key)
329       return;
330 
331    DBG("%p: key=%p", batch, batch->key);
332    for (unsigned idx = 0; idx < key->num_surfs; idx++) {
333       struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
334       rsc->track->bc_batch_mask &= ~(1 << batch->idx);
335    }
336 
337    struct hash_entry *entry =
338       _mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key);
339    _mesa_hash_table_remove(cache->ht, entry);
340 }
341 
342 void
fd_bc_invalidate_resource(struct fd_resource * rsc,bool destroy)343 fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy)
344 {
345    struct fd_screen *screen = fd_screen(rsc->b.b.screen);
346    struct fd_batch *batch;
347 
348    fd_screen_lock(screen);
349 
350    if (destroy) {
351       foreach_batch (batch, &screen->batch_cache, rsc->track->batch_mask) {
352          struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc);
353          _mesa_set_remove(batch->resources, entry);
354       }
355       rsc->track->batch_mask = 0;
356 
357       fd_batch_reference_locked(&rsc->track->write_batch, NULL);
358    }
359 
360    foreach_batch (batch, &screen->batch_cache, rsc->track->bc_batch_mask)
361       fd_bc_invalidate_batch(batch, false);
362 
363    rsc->track->bc_batch_mask = 0;
364 
365    fd_screen_unlock(screen);
366 }
367 
368 static struct fd_batch *
alloc_batch_locked(struct fd_batch_cache * cache,struct fd_context * ctx,bool nondraw)369 alloc_batch_locked(struct fd_batch_cache *cache, struct fd_context *ctx,
370                    bool nondraw) assert_dt
371 {
372    struct fd_batch *batch;
373    uint32_t idx;
374 
375    fd_screen_assert_locked(ctx->screen);
376 
377    while ((idx = ffs(~cache->batch_mask)) == 0) {
378 #if 0
379       for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
380          batch = cache->batches[i];
381          debug_printf("%d: needs_flush=%d, depends:", batch->idx, batch->needs_flush);
382          set_foreach (batch->dependencies, entry) {
383             struct fd_batch *dep = (struct fd_batch *)entry->key;
384             debug_printf(" %d", dep->idx);
385          }
386          debug_printf("\n");
387       }
388 #endif
389       /* TODO: is LRU the better policy?  Or perhaps the batch that
390        * depends on the fewest other batches?
391        */
392       struct fd_batch *flush_batch = NULL;
393       for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
394          if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno))
395             fd_batch_reference_locked(&flush_batch, cache->batches[i]);
396       }
397 
398       /* we can drop lock temporarily here, since we hold a ref,
399        * flush_batch won't disappear under us.
400        */
401       fd_screen_unlock(ctx->screen);
402       DBG("%p: too many batches!  flush forced!", flush_batch);
403       fd_batch_flush(flush_batch);
404       fd_screen_lock(ctx->screen);
405 
406       /* While the resources get cleaned up automatically, the flush_batch
407        * doesn't get removed from the dependencies of other batches, so
408        * it won't be unref'd and will remain in the table.
409        *
410        * TODO maybe keep a bitmask of batches that depend on me, to make
411        * this easier:
412        */
413       for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
414          struct fd_batch *other = cache->batches[i];
415          if (!other)
416             continue;
417          if (fd_batch_has_dep(other, flush_batch)) {
418             other->dependents_mask &= ~(1 << flush_batch->idx);
419             struct fd_batch *ref = flush_batch;
420             fd_batch_reference_locked(&ref, NULL);
421          }
422       }
423 
424       fd_batch_reference_locked(&flush_batch, NULL);
425    }
426 
427    idx--; /* bit zero returns 1 for ffs() */
428 
429    batch = fd_batch_create(ctx, nondraw);
430    if (!batch)
431       return NULL;
432 
433    batch->seqno = seqno_next(&cache->cnt);
434    batch->idx = idx;
435    cache->batch_mask |= (1 << idx);
436 
437    assert(cache->batches[idx] == NULL);
438    cache->batches[idx] = batch;
439 
440    return batch;
441 }
442 
443 static void
alloc_query_buf(struct fd_context * ctx,struct fd_batch * batch)444 alloc_query_buf(struct fd_context *ctx, struct fd_batch *batch)
445 {
446    if (batch->query_buf)
447       return;
448 
449    if ((ctx->screen->gen < 3) || (ctx->screen->gen > 4))
450       return;
451 
452    /* For gens that use fd_hw_query, pre-allocate an initially zero-sized
453     * (unbacked) query buffer.  This simplifies draw/grid/etc-time resource
454     * tracking.
455     */
456    struct pipe_screen *pscreen = &ctx->screen->base;
457    struct pipe_resource templ = {
458       .target = PIPE_BUFFER,
459       .format = PIPE_FORMAT_R8_UNORM,
460       .bind = PIPE_BIND_QUERY_BUFFER,
461       .width0 = 0, /* create initially zero size buffer */
462       .height0 = 1,
463       .depth0 = 1,
464       .array_size = 1,
465       .last_level = 0,
466       .nr_samples = 1,
467    };
468    batch->query_buf = pscreen->resource_create(pscreen, &templ);
469 }
470 
471 struct fd_batch *
fd_bc_alloc_batch(struct fd_context * ctx,bool nondraw)472 fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw)
473 {
474    struct fd_batch_cache *cache = &ctx->screen->batch_cache;
475    struct fd_batch *batch;
476 
477    /* For normal draw batches, pctx->set_framebuffer_state() handles
478     * this, but for nondraw batches, this is a nice central location
479     * to handle them all.
480     */
481    if (nondraw)
482       fd_context_switch_from(ctx);
483 
484    fd_screen_lock(ctx->screen);
485    batch = alloc_batch_locked(cache, ctx, nondraw);
486    fd_screen_unlock(ctx->screen);
487 
488    alloc_query_buf(ctx, batch);
489 
490    if (batch && nondraw)
491       fd_context_switch_to(ctx, batch);
492 
493    return batch;
494 }
495 
496 static struct fd_batch *
batch_from_key(struct fd_context * ctx,struct fd_batch_key * key)497 batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt
498 {
499    struct fd_batch_cache *cache = &ctx->screen->batch_cache;
500    struct fd_batch *batch = NULL;
501    uint32_t hash = fd_batch_key_hash(key);
502    struct hash_entry *entry =
503       _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
504 
505    if (entry) {
506       free(key);
507       fd_batch_reference_locked(&batch, (struct fd_batch *)entry->data);
508       assert(!batch->flushed);
509       return batch;
510    }
511 
512    batch = alloc_batch_locked(cache, ctx, false);
513 #ifdef DEBUG
514    DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash, key->width,
515        key->height, key->layers, key->samples);
516    for (unsigned idx = 0; idx < key->num_surfs; idx++) {
517       DBG("%p:  surf[%u]: %p (%s) (%u,%u / %u,%u,%u)", batch,
518           key->surf[idx].pos, key->surf[idx].texture,
519           util_format_name(key->surf[idx].format),
520           key->surf[idx].u.buf.first_element, key->surf[idx].u.buf.last_element,
521           key->surf[idx].u.tex.first_layer, key->surf[idx].u.tex.last_layer,
522           key->surf[idx].u.tex.level);
523    }
524 #endif
525    if (!batch)
526       return NULL;
527 
528    /* reset max_scissor, which will be adjusted on draws
529     * according to the actual scissor.
530     */
531    batch->max_scissor.minx = ~0;
532    batch->max_scissor.miny = ~0;
533    batch->max_scissor.maxx = 0;
534    batch->max_scissor.maxy = 0;
535 
536    _mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch);
537    batch->key = key;
538    batch->hash = hash;
539 
540    for (unsigned idx = 0; idx < key->num_surfs; idx++) {
541       struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
542       rsc->track->bc_batch_mask = (1 << batch->idx);
543    }
544 
545    return batch;
546 }
547 
548 static void
key_surf(struct fd_batch_key * key,unsigned idx,unsigned pos,struct pipe_surface * psurf)549 key_surf(struct fd_batch_key *key, unsigned idx, unsigned pos,
550          struct pipe_surface *psurf)
551 {
552    key->surf[idx].texture = psurf->texture;
553    key->surf[idx].u = psurf->u;
554    key->surf[idx].pos = pos;
555    key->surf[idx].samples = MAX2(1, psurf->nr_samples);
556    key->surf[idx].format = psurf->format;
557 }
558 
559 struct fd_batch *
fd_batch_from_fb(struct fd_context * ctx,const struct pipe_framebuffer_state * pfb)560 fd_batch_from_fb(struct fd_context *ctx,
561                  const struct pipe_framebuffer_state *pfb)
562 {
563    unsigned idx = 0, n = pfb->nr_cbufs + (pfb->zsbuf ? 1 : 0);
564    struct fd_batch_key *key = key_alloc(n);
565 
566    key->width = pfb->width;
567    key->height = pfb->height;
568    key->layers = pfb->layers;
569    key->samples = util_framebuffer_get_num_samples(pfb);
570    key->ctx_seqno = ctx->seqno;
571 
572    if (pfb->zsbuf)
573       key_surf(key, idx++, 0, pfb->zsbuf);
574 
575    for (unsigned i = 0; i < pfb->nr_cbufs; i++)
576       if (pfb->cbufs[i])
577          key_surf(key, idx++, i + 1, pfb->cbufs[i]);
578 
579    key->num_surfs = idx;
580 
581    fd_screen_lock(ctx->screen);
582    struct fd_batch *batch = batch_from_key(ctx, key);
583    fd_screen_unlock(ctx->screen);
584 
585    alloc_query_buf(ctx, batch);
586 
587    fd_batch_set_fb(batch, pfb);
588 
589    return batch;
590 }
591