1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "util/hash_table.h"
28 #include "util/list.h"
29 #include "util/set.h"
30 #include "util/u_string.h"
31 #define XXH_INLINE_ALL
32 #include "util/xxhash.h"
33
34 #include "freedreno_batch.h"
35 #include "freedreno_batch_cache.h"
36 #include "freedreno_context.h"
37 #include "freedreno_resource.h"
38
39 /* Overview:
40 *
41 * The batch cache provides lookup for mapping pipe_framebuffer_state
42 * to a batch.
43 *
44 * It does this via hashtable, with key that roughly matches the
45 * pipe_framebuffer_state, as described below.
46 *
47 * Batch Cache hashtable key:
48 *
49 * To serialize the key, and to avoid dealing with holding a reference to
50 * pipe_surface's (which hold a reference to pipe_resource and complicate
51 * the whole refcnting thing), the key is variable length and inline's the
52 * pertinent details of the pipe_surface.
53 *
54 * Batch:
55 *
56 * Each batch needs to hold a reference to each resource it depends on (ie.
57 * anything that needs a mem2gmem). And a weak reference to resources it
58 * renders to. (If both src[n] and dst[n] are not NULL then they are the
59 * same.)
60 *
61 * When a resource is destroyed, we need to remove entries in the batch
62 * cache that reference the resource, to avoid dangling pointer issues.
63 * So each resource holds a hashset of batches which have reference them
64 * in their hashtable key.
65 *
66 * When a batch has weak reference to no more resources (ie. all the
67 * surfaces it rendered to are destroyed) the batch can be destroyed.
68 * Could happen in an app that renders and never uses the result. More
69 * common scenario, I think, will be that some, but not all, of the
70 * surfaces are destroyed before the batch is submitted.
71 *
72 * If (for example), batch writes to zsbuf but that surface is destroyed
73 * before batch is submitted, we can skip gmem2mem (but still need to
74 * alloc gmem space as before. If the batch depended on previous contents
75 * of that surface, it would be holding a reference so the surface would
76 * not have been destroyed.
77 */
78
79 struct fd_batch_key {
80 uint32_t width;
81 uint32_t height;
82 uint16_t layers;
83 uint16_t samples;
84 uint16_t num_surfs;
85 uint16_t ctx_seqno;
86 struct {
87 struct pipe_resource *texture;
88 union pipe_surface_desc u;
89 uint8_t pos, samples;
90 uint16_t format;
91 } surf[0];
92 };
93
94 static struct fd_batch_key *
key_alloc(unsigned num_surfs)95 key_alloc(unsigned num_surfs)
96 {
97 struct fd_batch_key *key = CALLOC_VARIANT_LENGTH_STRUCT(
98 fd_batch_key, sizeof(key->surf[0]) * num_surfs);
99 return key;
100 }
101
102 uint32_t
fd_batch_key_hash(const void * _key)103 fd_batch_key_hash(const void *_key)
104 {
105 const struct fd_batch_key *key = _key;
106 uint32_t hash = 0;
107 hash = XXH32(key, offsetof(struct fd_batch_key, surf[0]), hash);
108 hash = XXH32(key->surf, sizeof(key->surf[0]) * key->num_surfs, hash);
109 return hash;
110 }
111
112 bool
fd_batch_key_equals(const void * _a,const void * _b)113 fd_batch_key_equals(const void *_a, const void *_b)
114 {
115 const struct fd_batch_key *a = _a;
116 const struct fd_batch_key *b = _b;
117 return (memcmp(a, b, offsetof(struct fd_batch_key, surf[0])) == 0) &&
118 (memcmp(a->surf, b->surf, sizeof(a->surf[0]) * a->num_surfs) == 0);
119 }
120
121 struct fd_batch_key *
fd_batch_key_clone(void * mem_ctx,const struct fd_batch_key * key)122 fd_batch_key_clone(void *mem_ctx, const struct fd_batch_key *key)
123 {
124 unsigned sz =
125 sizeof(struct fd_batch_key) + (sizeof(key->surf[0]) * key->num_surfs);
126 struct fd_batch_key *new_key = rzalloc_size(mem_ctx, sz);
127 memcpy(new_key, key, sz);
128 return new_key;
129 }
130
131 void
fd_bc_init(struct fd_batch_cache * cache)132 fd_bc_init(struct fd_batch_cache *cache)
133 {
134 cache->ht =
135 _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
136 }
137
138 void
fd_bc_fini(struct fd_batch_cache * cache)139 fd_bc_fini(struct fd_batch_cache *cache)
140 {
141 _mesa_hash_table_destroy(cache->ht, NULL);
142 }
143
144 /* Flushes all batches in the batch cache. Used at glFlush() and similar times. */
145 void
fd_bc_flush(struct fd_context * ctx,bool deferred)146 fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt
147 {
148 struct fd_batch_cache *cache = &ctx->screen->batch_cache;
149
150 /* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)
151 * can cause batches to be unref'd and freed under our feet, so grab
152 * a reference to all the batches we need up-front.
153 */
154 struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};
155 struct fd_batch *batch;
156 unsigned n = 0;
157
158 fd_screen_lock(ctx->screen);
159
160 foreach_batch (batch, cache, cache->batch_mask) {
161 if (batch->ctx == ctx) {
162 fd_batch_reference_locked(&batches[n++], batch);
163 }
164 }
165
166 /* deferred flush doesn't actually flush, but it marks every other
167 * batch associated with the context as dependent on the current
168 * batch. So when the current batch gets flushed, all other batches
169 * that came before also get flushed.
170 */
171 if (deferred) {
172 struct fd_batch *current_batch = fd_context_batch(ctx);
173
174 for (unsigned i = 0; i < n; i++) {
175 if (batches[i] && (batches[i]->ctx == ctx) &&
176 (batches[i] != current_batch)) {
177 fd_batch_add_dep(current_batch, batches[i]);
178 }
179 }
180
181 fd_batch_reference_locked(¤t_batch, NULL);
182
183 fd_screen_unlock(ctx->screen);
184 } else {
185 fd_screen_unlock(ctx->screen);
186
187 for (unsigned i = 0; i < n; i++) {
188 fd_batch_flush(batches[i]);
189 }
190 }
191
192 for (unsigned i = 0; i < n; i++) {
193 fd_batch_reference(&batches[i], NULL);
194 }
195 }
196
197 /**
198 * Flushes the batch (if any) writing this resource. Must not hold the screen
199 * lock.
200 */
201 void
fd_bc_flush_writer(struct fd_context * ctx,struct fd_resource * rsc)202 fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt
203 {
204 fd_screen_lock(ctx->screen);
205 struct fd_batch *write_batch = NULL;
206 fd_batch_reference_locked(&write_batch, rsc->track->write_batch);
207 fd_screen_unlock(ctx->screen);
208
209 if (write_batch) {
210 fd_batch_flush(write_batch);
211 fd_batch_reference(&write_batch, NULL);
212 }
213 }
214
215 /**
216 * Flushes any batches reading this resource. Must not hold the screen lock.
217 */
218 void
fd_bc_flush_readers(struct fd_context * ctx,struct fd_resource * rsc)219 fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt
220 {
221 struct fd_batch *batch, *batches[32] = {};
222 uint32_t batch_count = 0;
223
224 /* This is a bit awkward, probably a fd_batch_flush_locked()
225 * would make things simpler.. but we need to hold the lock
226 * to iterate the batches which reference this resource. So
227 * we must first grab references under a lock, then flush.
228 */
229 fd_screen_lock(ctx->screen);
230 foreach_batch (batch, &ctx->screen->batch_cache, rsc->track->batch_mask)
231 fd_batch_reference_locked(&batches[batch_count++], batch);
232 fd_screen_unlock(ctx->screen);
233
234 for (int i = 0; i < batch_count; i++) {
235 fd_batch_flush(batches[i]);
236 fd_batch_reference(&batches[i], NULL);
237 }
238 }
239
240 void
fd_bc_dump(struct fd_context * ctx,const char * fmt,...)241 fd_bc_dump(struct fd_context *ctx, const char *fmt, ...)
242 {
243 struct fd_batch_cache *cache = &ctx->screen->batch_cache;
244
245 if (!FD_DBG(MSGS))
246 return;
247
248 fd_screen_lock(ctx->screen);
249
250 va_list ap;
251 va_start(ap, fmt);
252 vprintf(fmt, ap);
253 va_end(ap);
254
255 for (int i = 0; i < ARRAY_SIZE(cache->batches); i++) {
256 struct fd_batch *batch = cache->batches[i];
257 if (batch) {
258 printf(" %p<%u>%s\n", batch, batch->seqno,
259 batch->needs_flush ? ", NEEDS FLUSH" : "");
260 }
261 }
262
263 printf("----\n");
264
265 fd_screen_unlock(ctx->screen);
266 }
267
268 /**
269 * Note that when batch is flushed, it needs to remain in the cache so
270 * that fd_bc_invalidate_resource() can work.. otherwise we can have
271 * the case where a rsc is destroyed while a batch still has a dangling
272 * reference to it.
273 *
274 * Note that the cmdstream (or, after the SUBMIT ioctl, the kernel)
275 * would have a reference to the underlying bo, so it is ok for the
276 * rsc to be destroyed before the batch.
277 */
278 void
fd_bc_invalidate_batch(struct fd_batch * batch,bool remove)279 fd_bc_invalidate_batch(struct fd_batch *batch, bool remove)
280 {
281 if (!batch)
282 return;
283
284 struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
285 struct fd_batch_key *key = batch->key;
286
287 fd_screen_assert_locked(batch->ctx->screen);
288
289 if (remove) {
290 cache->batches[batch->idx] = NULL;
291 cache->batch_mask &= ~(1 << batch->idx);
292 }
293
294 if (!key)
295 return;
296
297 DBG("%p: key=%p", batch, batch->key);
298 for (unsigned idx = 0; idx < key->num_surfs; idx++) {
299 struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
300 rsc->track->bc_batch_mask &= ~(1 << batch->idx);
301 }
302
303 struct hash_entry *entry =
304 _mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key);
305 _mesa_hash_table_remove(cache->ht, entry);
306 }
307
308 void
fd_bc_invalidate_resource(struct fd_resource * rsc,bool destroy)309 fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy)
310 {
311 struct fd_screen *screen = fd_screen(rsc->b.b.screen);
312 struct fd_batch *batch;
313
314 fd_screen_lock(screen);
315
316 if (destroy) {
317 foreach_batch (batch, &screen->batch_cache, rsc->track->batch_mask) {
318 struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc);
319 _mesa_set_remove(batch->resources, entry);
320 }
321 rsc->track->batch_mask = 0;
322
323 fd_batch_reference_locked(&rsc->track->write_batch, NULL);
324 }
325
326 foreach_batch (batch, &screen->batch_cache, rsc->track->bc_batch_mask)
327 fd_bc_invalidate_batch(batch, false);
328
329 rsc->track->bc_batch_mask = 0;
330
331 fd_screen_unlock(screen);
332 }
333
334 static struct fd_batch *
alloc_batch_locked(struct fd_batch_cache * cache,struct fd_context * ctx,bool nondraw)335 alloc_batch_locked(struct fd_batch_cache *cache, struct fd_context *ctx,
336 bool nondraw) assert_dt
337 {
338 struct fd_batch *batch;
339 uint32_t idx;
340
341 fd_screen_assert_locked(ctx->screen);
342
343 while ((idx = ffs(~cache->batch_mask)) == 0) {
344 #if 0
345 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
346 batch = cache->batches[i];
347 debug_printf("%d: needs_flush=%d, depends:", batch->idx, batch->needs_flush);
348 set_foreach (batch->dependencies, entry) {
349 struct fd_batch *dep = (struct fd_batch *)entry->key;
350 debug_printf(" %d", dep->idx);
351 }
352 debug_printf("\n");
353 }
354 #endif
355 /* TODO: is LRU the better policy? Or perhaps the batch that
356 * depends on the fewest other batches?
357 */
358 struct fd_batch *flush_batch = NULL;
359 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
360 if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno))
361 fd_batch_reference_locked(&flush_batch, cache->batches[i]);
362 }
363
364 /* we can drop lock temporarily here, since we hold a ref,
365 * flush_batch won't disappear under us.
366 */
367 fd_screen_unlock(ctx->screen);
368 DBG("%p: too many batches! flush forced!", flush_batch);
369 fd_batch_flush(flush_batch);
370 fd_screen_lock(ctx->screen);
371
372 /* While the resources get cleaned up automatically, the flush_batch
373 * doesn't get removed from the dependencies of other batches, so
374 * it won't be unref'd and will remain in the table.
375 *
376 * TODO maybe keep a bitmask of batches that depend on me, to make
377 * this easier:
378 */
379 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
380 struct fd_batch *other = cache->batches[i];
381 if (!other)
382 continue;
383 if (other->dependents_mask & (1 << flush_batch->idx)) {
384 other->dependents_mask &= ~(1 << flush_batch->idx);
385 struct fd_batch *ref = flush_batch;
386 fd_batch_reference_locked(&ref, NULL);
387 }
388 }
389
390 fd_batch_reference_locked(&flush_batch, NULL);
391 }
392
393 idx--; /* bit zero returns 1 for ffs() */
394
395 batch = fd_batch_create(ctx, nondraw);
396 if (!batch)
397 return NULL;
398
399 batch->seqno = cache->cnt++;
400 batch->idx = idx;
401 cache->batch_mask |= (1 << idx);
402
403 debug_assert(cache->batches[idx] == NULL);
404 cache->batches[idx] = batch;
405
406 return batch;
407 }
408
409 struct fd_batch *
fd_bc_alloc_batch(struct fd_context * ctx,bool nondraw)410 fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw)
411 {
412 struct fd_batch_cache *cache = &ctx->screen->batch_cache;
413 struct fd_batch *batch;
414
415 /* For normal draw batches, pctx->set_framebuffer_state() handles
416 * this, but for nondraw batches, this is a nice central location
417 * to handle them all.
418 */
419 if (nondraw)
420 fd_context_switch_from(ctx);
421
422 fd_screen_lock(ctx->screen);
423 batch = alloc_batch_locked(cache, ctx, nondraw);
424 fd_screen_unlock(ctx->screen);
425
426 if (batch && nondraw)
427 fd_context_switch_to(ctx, batch);
428
429 return batch;
430 }
431
432 static struct fd_batch *
batch_from_key(struct fd_context * ctx,struct fd_batch_key * key)433 batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt
434 {
435 struct fd_batch_cache *cache = &ctx->screen->batch_cache;
436 struct fd_batch *batch = NULL;
437 uint32_t hash = fd_batch_key_hash(key);
438 struct hash_entry *entry =
439 _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
440
441 if (entry) {
442 free(key);
443 fd_batch_reference_locked(&batch, (struct fd_batch *)entry->data);
444 assert(!batch->flushed);
445 return batch;
446 }
447
448 batch = alloc_batch_locked(cache, ctx, false);
449 #ifdef DEBUG
450 DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash, key->width,
451 key->height, key->layers, key->samples);
452 for (unsigned idx = 0; idx < key->num_surfs; idx++) {
453 DBG("%p: surf[%u]: %p (%s) (%u,%u / %u,%u,%u)", batch,
454 key->surf[idx].pos, key->surf[idx].texture,
455 util_format_name(key->surf[idx].format),
456 key->surf[idx].u.buf.first_element, key->surf[idx].u.buf.last_element,
457 key->surf[idx].u.tex.first_layer, key->surf[idx].u.tex.last_layer,
458 key->surf[idx].u.tex.level);
459 }
460 #endif
461 if (!batch)
462 return NULL;
463
464 /* reset max_scissor, which will be adjusted on draws
465 * according to the actual scissor.
466 */
467 batch->max_scissor.minx = ~0;
468 batch->max_scissor.miny = ~0;
469 batch->max_scissor.maxx = 0;
470 batch->max_scissor.maxy = 0;
471
472 _mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch);
473 batch->key = key;
474 batch->hash = hash;
475
476 for (unsigned idx = 0; idx < key->num_surfs; idx++) {
477 struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
478 rsc->track->bc_batch_mask = (1 << batch->idx);
479 }
480
481 return batch;
482 }
483
484 static void
key_surf(struct fd_batch_key * key,unsigned idx,unsigned pos,struct pipe_surface * psurf)485 key_surf(struct fd_batch_key *key, unsigned idx, unsigned pos,
486 struct pipe_surface *psurf)
487 {
488 key->surf[idx].texture = psurf->texture;
489 key->surf[idx].u = psurf->u;
490 key->surf[idx].pos = pos;
491 key->surf[idx].samples = MAX2(1, psurf->nr_samples);
492 key->surf[idx].format = psurf->format;
493 }
494
495 struct fd_batch *
fd_batch_from_fb(struct fd_context * ctx,const struct pipe_framebuffer_state * pfb)496 fd_batch_from_fb(struct fd_context *ctx,
497 const struct pipe_framebuffer_state *pfb)
498 {
499 unsigned idx = 0, n = pfb->nr_cbufs + (pfb->zsbuf ? 1 : 0);
500 struct fd_batch_key *key = key_alloc(n);
501
502 key->width = pfb->width;
503 key->height = pfb->height;
504 key->layers = pfb->layers;
505 key->samples = util_framebuffer_get_num_samples(pfb);
506 key->ctx_seqno = ctx->seqno;
507
508 if (pfb->zsbuf)
509 key_surf(key, idx++, 0, pfb->zsbuf);
510
511 for (unsigned i = 0; i < pfb->nr_cbufs; i++)
512 if (pfb->cbufs[i])
513 key_surf(key, idx++, i + 1, pfb->cbufs[i]);
514
515 key->num_surfs = idx;
516
517 fd_screen_lock(ctx->screen);
518 struct fd_batch *batch = batch_from_key(ctx, key);
519 fd_screen_unlock(ctx->screen);
520
521 return batch;
522 }
523