1 /*
2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "util/hash_table.h"
28 #include "util/list.h"
29 #include "util/set.h"
30 #include "util/u_string.h"
31
32 #include "freedreno_batch.h"
33 #include "freedreno_context.h"
34 #include "freedreno_fence.h"
35 #include "freedreno_query_hw.h"
36 #include "freedreno_resource.h"
37
38 static struct fd_ringbuffer *
alloc_ring(struct fd_batch * batch,unsigned sz,enum fd_ringbuffer_flags flags)39 alloc_ring(struct fd_batch *batch, unsigned sz, enum fd_ringbuffer_flags flags)
40 {
41 struct fd_context *ctx = batch->ctx;
42
43 /* if kernel is too old to support unlimited # of cmd buffers, we
44 * have no option but to allocate large worst-case sizes so that
45 * we don't need to grow the ringbuffer. Performance is likely to
46 * suffer, but there is no good alternative.
47 *
48 * Otherwise if supported, allocate a growable ring with initial
49 * size of zero.
50 */
51 if ((fd_device_version(ctx->screen->dev) >= FD_VERSION_UNLIMITED_CMDS) &&
52 !FD_DBG(NOGROW)) {
53 flags |= FD_RINGBUFFER_GROWABLE;
54 sz = 0;
55 }
56
57 return fd_submit_new_ringbuffer(batch->submit, sz, flags);
58 }
59
60 static struct fd_batch_subpass *
subpass_create(struct fd_batch * batch)61 subpass_create(struct fd_batch *batch)
62 {
63 struct fd_batch_subpass *subpass = CALLOC_STRUCT(fd_batch_subpass);
64
65 subpass->draw = alloc_ring(batch, 0x100000, 0);
66
67 /* Replace batch->draw with reference to current subpass, for
68 * backwards compat with code that is not subpass aware.
69 */
70 if (batch->draw)
71 fd_ringbuffer_del(batch->draw);
72 batch->draw = fd_ringbuffer_ref(subpass->draw);
73
74 list_addtail(&subpass->node, &batch->subpasses);
75
76 return subpass;
77 }
78
79 static void
subpass_destroy(struct fd_batch_subpass * subpass)80 subpass_destroy(struct fd_batch_subpass *subpass)
81 {
82 fd_ringbuffer_del(subpass->draw);
83 if (subpass->subpass_clears)
84 fd_ringbuffer_del(subpass->subpass_clears);
85 list_del(&subpass->node);
86 if (subpass->lrz)
87 fd_bo_del(subpass->lrz);
88 free(subpass);
89 }
90
91 struct fd_batch *
fd_batch_create(struct fd_context * ctx,bool nondraw)92 fd_batch_create(struct fd_context *ctx, bool nondraw)
93 {
94 struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
95
96 if (!batch)
97 return NULL;
98
99 DBG("%p", batch);
100
101 pipe_reference_init(&batch->reference, 1);
102 batch->ctx = ctx;
103 batch->nondraw = nondraw;
104
105 batch->resources =
106 _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
107
108 list_inithead(&batch->subpasses);
109
110 batch->submit = fd_submit_new(ctx->pipe);
111 if (batch->nondraw) {
112 batch->gmem = alloc_ring(batch, 0x1000, FD_RINGBUFFER_PRIMARY);
113 } else {
114 batch->gmem = alloc_ring(batch, 0x100000, FD_RINGBUFFER_PRIMARY);
115
116 /* a6xx+ re-uses draw rb for both draw and binning pass: */
117 if (ctx->screen->gen < 6) {
118 batch->binning = alloc_ring(batch, 0x100000, 0);
119 }
120 }
121
122 /* Pre-attach private BOs: */
123 for (unsigned i = 0; i < ctx->num_private_bos; i++)
124 fd_ringbuffer_attach_bo(batch->gmem, ctx->private_bos[i]);
125
126 batch->subpass = subpass_create(batch);
127
128 batch->in_fence_fd = -1;
129 batch->fence = NULL;
130
131 /* Work around problems on earlier gens with submit merging, etc,
132 * by always creating a fence to request that the submit is flushed
133 * immediately:
134 */
135 if (ctx->screen->gen < 6)
136 batch->fence = fd_pipe_fence_create(batch);
137
138 fd_reset_wfi(batch);
139
140 util_dynarray_init(&batch->draw_patches, NULL);
141 util_dynarray_init(&(batch->fb_read_patches), NULL);
142
143 if (is_a2xx(ctx->screen)) {
144 util_dynarray_init(&batch->shader_patches, NULL);
145 util_dynarray_init(&batch->gmem_patches, NULL);
146 }
147
148 if (is_a3xx(ctx->screen))
149 util_dynarray_init(&batch->rbrc_patches, NULL);
150
151 util_dynarray_init(&batch->samples, NULL);
152
153 u_trace_init(&batch->trace, &ctx->trace_context);
154 batch->last_timestamp_cmd = NULL;
155
156 return batch;
157 }
158
159 struct fd_batch_subpass *
fd_batch_create_subpass(struct fd_batch * batch)160 fd_batch_create_subpass(struct fd_batch *batch)
161 {
162 assert(!batch->nondraw);
163
164 struct fd_batch_subpass *subpass = subpass_create(batch);
165
166 /* This new subpass inherits the current subpass.. this is replaced
167 * if there is a depth clear
168 */
169 if (batch->subpass->lrz)
170 subpass->lrz = fd_bo_ref(batch->subpass->lrz);
171
172 batch->subpass = subpass;
173
174 return subpass;
175 }
176
177 /**
178 * Cleanup that we normally do when the submit is flushed, like dropping
179 * rb references. But also called when batch is destroyed just in case
180 * it wasn't flushed.
181 */
182 static void
cleanup_submit(struct fd_batch * batch)183 cleanup_submit(struct fd_batch *batch)
184 {
185 if (!batch->submit)
186 return;
187
188 foreach_subpass_safe (subpass, batch) {
189 subpass_destroy(subpass);
190 }
191
192 fd_ringbuffer_del(batch->draw);
193 fd_ringbuffer_del(batch->gmem);
194
195 if (batch->binning) {
196 fd_ringbuffer_del(batch->binning);
197 batch->binning = NULL;
198 }
199
200 if (batch->prologue) {
201 fd_ringbuffer_del(batch->prologue);
202 batch->prologue = NULL;
203 }
204
205 if (batch->tile_epilogue) {
206 fd_ringbuffer_del(batch->tile_epilogue);
207 batch->tile_epilogue = NULL;
208 }
209
210 if (batch->epilogue) {
211 fd_ringbuffer_del(batch->epilogue);
212 batch->epilogue = NULL;
213 }
214
215 if (batch->tile_loads) {
216 fd_ringbuffer_del(batch->tile_loads);
217 batch->tile_loads = NULL;
218 }
219
220 if (batch->tile_store) {
221 fd_ringbuffer_del(batch->tile_store);
222 batch->tile_store = NULL;
223 }
224
225 fd_submit_del(batch->submit);
226 batch->submit = NULL;
227 }
228
229 static void
batch_flush_dependencies(struct fd_batch * batch)230 batch_flush_dependencies(struct fd_batch *batch) assert_dt
231 {
232 struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
233 struct fd_batch *dep;
234
235 foreach_batch (dep, cache, batch->dependents_mask) {
236 assert(dep->ctx == batch->ctx);
237 fd_batch_flush(dep);
238 fd_batch_reference(&dep, NULL);
239 }
240
241 batch->dependents_mask = 0;
242 }
243
244 static void
batch_reset_dependencies(struct fd_batch * batch)245 batch_reset_dependencies(struct fd_batch *batch)
246 {
247 struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
248 struct fd_batch *dep;
249
250 foreach_batch (dep, cache, batch->dependents_mask) {
251 fd_batch_reference(&dep, NULL);
252 }
253
254 batch->dependents_mask = 0;
255 }
256
257 static void
batch_reset_resources(struct fd_batch * batch)258 batch_reset_resources(struct fd_batch *batch)
259 {
260 fd_screen_assert_locked(batch->ctx->screen);
261
262 set_foreach (batch->resources, entry) {
263 struct fd_resource *rsc = (struct fd_resource *)entry->key;
264 _mesa_set_remove(batch->resources, entry);
265 assert(rsc->track->batch_mask & (1 << batch->idx));
266 rsc->track->batch_mask &= ~(1 << batch->idx);
267 if (rsc->track->write_batch == batch)
268 fd_batch_reference_locked(&rsc->track->write_batch, NULL);
269 }
270 }
271
272 void
__fd_batch_destroy_locked(struct fd_batch * batch)273 __fd_batch_destroy_locked(struct fd_batch *batch)
274 {
275 struct fd_context *ctx = batch->ctx;
276
277 DBG("%p", batch);
278
279 fd_screen_assert_locked(batch->ctx->screen);
280
281 fd_bc_invalidate_batch(batch, true);
282
283 batch_reset_resources(batch);
284 assert(batch->resources->entries == 0);
285 _mesa_set_destroy(batch->resources, NULL);
286
287 fd_screen_unlock(ctx->screen);
288 batch_reset_dependencies(batch);
289 assert(batch->dependents_mask == 0);
290
291 util_copy_framebuffer_state(&batch->framebuffer, NULL);
292
293 pipe_resource_reference(&batch->query_buf, NULL);
294
295 if (batch->in_fence_fd != -1)
296 close(batch->in_fence_fd);
297
298 /* in case batch wasn't flushed but fence was created: */
299 if (batch->fence)
300 fd_pipe_fence_set_batch(batch->fence, NULL);
301
302 fd_pipe_fence_ref(&batch->fence, NULL);
303
304 cleanup_submit(batch);
305
306 util_dynarray_fini(&batch->draw_patches);
307 util_dynarray_fini(&(batch->fb_read_patches));
308
309 if (is_a2xx(batch->ctx->screen)) {
310 util_dynarray_fini(&batch->shader_patches);
311 util_dynarray_fini(&batch->gmem_patches);
312 }
313
314 if (is_a3xx(batch->ctx->screen))
315 util_dynarray_fini(&batch->rbrc_patches);
316
317 while (batch->samples.size > 0) {
318 struct fd_hw_sample *samp =
319 util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
320 fd_hw_sample_reference(batch->ctx, &samp, NULL);
321 }
322 util_dynarray_fini(&batch->samples);
323
324 u_trace_fini(&batch->trace);
325
326 free(batch->key);
327 free(batch);
328 fd_screen_lock(ctx->screen);
329 }
330
331 void
__fd_batch_destroy(struct fd_batch * batch)332 __fd_batch_destroy(struct fd_batch *batch)
333 {
334 struct fd_screen *screen = batch->ctx->screen;
335 fd_screen_lock(screen);
336 __fd_batch_destroy_locked(batch);
337 fd_screen_unlock(screen);
338 }
339
340 void
__fd_batch_describe(char * buf,const struct fd_batch * batch)341 __fd_batch_describe(char *buf, const struct fd_batch *batch)
342 {
343 sprintf(buf, "fd_batch<%u>", batch->seqno);
344 }
345
346 /* Get per-batch prologue */
347 struct fd_ringbuffer *
fd_batch_get_prologue(struct fd_batch * batch)348 fd_batch_get_prologue(struct fd_batch *batch)
349 {
350 if (!batch->prologue)
351 batch->prologue = alloc_ring(batch, 0x1000, 0);
352 return batch->prologue;
353 }
354
355 /* Only called from fd_batch_flush() */
356 static void
batch_flush(struct fd_batch * batch)357 batch_flush(struct fd_batch *batch) assert_dt
358 {
359 DBG("%p: needs_flush=%d", batch, batch->needs_flush);
360
361 if (batch->flushed)
362 return;
363
364 tc_assert_driver_thread(batch->ctx->tc);
365
366 batch->needs_flush = false;
367
368 /* close out the draw cmds by making sure any active queries are
369 * paused:
370 */
371 fd_batch_finish_queries(batch);
372
373 batch_flush_dependencies(batch);
374
375 fd_screen_lock(batch->ctx->screen);
376 batch_reset_resources(batch);
377 /* NOTE: remove=false removes the batch from the hashtable, so future
378 * lookups won't cache-hit a flushed batch, but leaves the weak reference
379 * to the batch to avoid having multiple batches with same batch->idx, as
380 * that causes all sorts of hilarity.
381 */
382 fd_bc_invalidate_batch(batch, false);
383 batch->flushed = true;
384
385 if (batch == batch->ctx->batch)
386 fd_batch_reference_locked(&batch->ctx->batch, NULL);
387
388 if (batch == batch->ctx->batch_nondraw)
389 fd_batch_reference_locked(&batch->ctx->batch_nondraw, NULL);
390
391 fd_screen_unlock(batch->ctx->screen);
392
393 if (batch->fence)
394 fd_pipe_fence_ref(&batch->ctx->last_fence, batch->fence);
395
396 fd_gmem_render_tiles(batch);
397
398 assert(batch->reference.count > 0);
399
400 cleanup_submit(batch);
401 }
402
403 void
fd_batch_set_fb(struct fd_batch * batch,const struct pipe_framebuffer_state * pfb)404 fd_batch_set_fb(struct fd_batch *batch, const struct pipe_framebuffer_state *pfb)
405 {
406 assert(!batch->nondraw);
407
408 util_copy_framebuffer_state(&batch->framebuffer, pfb);
409
410 if (!pfb->zsbuf)
411 return;
412
413 struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
414
415 /* Switching back to a batch we'd previously started constructing shouldn't
416 * result in a different lrz. The dependency tracking should avoid another
417 * batch writing/clearing our depth buffer.
418 */
419 if (batch->subpass->lrz) {
420 assert(batch->subpass->lrz == zsbuf->lrz);
421 } else if (zsbuf->lrz) {
422 batch->subpass->lrz = fd_bo_ref(zsbuf->lrz);
423 }
424 }
425
426
427 /* NOTE: could drop the last ref to batch
428 */
429 void
fd_batch_flush(struct fd_batch * batch)430 fd_batch_flush(struct fd_batch *batch)
431 {
432 struct fd_batch *tmp = NULL;
433
434 /* NOTE: we need to hold an extra ref across the body of flush,
435 * since the last ref to this batch could be dropped when cleaning
436 * up used_resources
437 */
438 fd_batch_reference(&tmp, batch);
439 batch_flush(tmp);
440 fd_batch_reference(&tmp, NULL);
441 }
442
443 /* find a batches dependents mask, including recursive dependencies: */
444 static uint32_t
recursive_dependents_mask(struct fd_batch * batch)445 recursive_dependents_mask(struct fd_batch *batch)
446 {
447 struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
448 struct fd_batch *dep;
449 uint32_t dependents_mask = batch->dependents_mask;
450
451 foreach_batch (dep, cache, batch->dependents_mask)
452 dependents_mask |= recursive_dependents_mask(dep);
453
454 return dependents_mask;
455 }
456
457 bool
fd_batch_has_dep(struct fd_batch * batch,struct fd_batch * dep)458 fd_batch_has_dep(struct fd_batch *batch, struct fd_batch *dep)
459 {
460 return !!(batch->dependents_mask & (1 << dep->idx));
461 }
462
463 void
fd_batch_add_dep(struct fd_batch * batch,struct fd_batch * dep)464 fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep)
465 {
466 fd_screen_assert_locked(batch->ctx->screen);
467
468 assert(batch->ctx == dep->ctx);
469
470 if (fd_batch_has_dep(batch, dep))
471 return;
472
473 /* a loop should not be possible */
474 assert(!((1 << batch->idx) & recursive_dependents_mask(dep)));
475
476 struct fd_batch *other = NULL;
477 fd_batch_reference_locked(&other, dep);
478 batch->dependents_mask |= (1 << dep->idx);
479 DBG("%p: added dependency on %p", batch, dep);
480 }
481
482 static void
flush_write_batch(struct fd_resource * rsc)483 flush_write_batch(struct fd_resource *rsc) assert_dt
484 {
485 struct fd_batch *b = NULL;
486 fd_batch_reference_locked(&b, rsc->track->write_batch);
487
488 fd_screen_unlock(b->ctx->screen);
489 fd_batch_flush(b);
490 fd_screen_lock(b->ctx->screen);
491
492 fd_batch_reference_locked(&b, NULL);
493 }
494
495 static void
fd_batch_add_resource(struct fd_batch * batch,struct fd_resource * rsc)496 fd_batch_add_resource(struct fd_batch *batch, struct fd_resource *rsc)
497 {
498 if (likely(fd_batch_references_resource(batch, rsc))) {
499 assert(_mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc));
500 return;
501 }
502
503 assert(!_mesa_set_search(batch->resources, rsc));
504
505 _mesa_set_add_pre_hashed(batch->resources, rsc->hash, rsc);
506 rsc->track->batch_mask |= (1 << batch->idx);
507
508 fd_ringbuffer_attach_bo(batch->draw, rsc->bo);
509 if (unlikely(rsc->b.b.next)) {
510 struct fd_resource *n = fd_resource(rsc->b.b.next);
511 fd_ringbuffer_attach_bo(batch->draw, n->bo);
512 }
513 }
514
515 void
fd_batch_resource_write(struct fd_batch * batch,struct fd_resource * rsc)516 fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
517 {
518 struct fd_resource_tracking *track = rsc->track;
519
520 fd_screen_assert_locked(batch->ctx->screen);
521
522 DBG("%p: write %p", batch, rsc);
523
524 /* Must do this before the early out, so we unset a previous resource
525 * invalidate (which may have left the write_batch state in place).
526 */
527 rsc->valid = true;
528
529 if (track->write_batch == batch)
530 return;
531
532 if (rsc->stencil)
533 fd_batch_resource_write(batch, rsc->stencil);
534
535 /* note, invalidate write batch, to avoid further writes to rsc
536 * resulting in a write-after-read hazard.
537 */
538
539 /* if we are pending read or write by any other batch, they need to
540 * be ordered before the current batch:
541 */
542 if (unlikely(track->batch_mask & ~(1 << batch->idx))) {
543 struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
544 struct fd_batch *dep;
545
546 if (track->write_batch) {
547 /* Cross-context writes without flush/barrier are undefined.
548 * Lets simply protect ourself from crashing by avoiding cross-
549 * ctx dependencies and let the app have the undefined behavior
550 * it asked for:
551 */
552 if (track->write_batch->ctx != batch->ctx) {
553 fd_ringbuffer_attach_bo(batch->draw, rsc->bo);
554 return;
555 }
556
557 flush_write_batch(rsc);
558 }
559
560 foreach_batch (dep, cache, track->batch_mask) {
561 struct fd_batch *b = NULL;
562 if ((dep == batch) || (dep->ctx != batch->ctx))
563 continue;
564 /* note that batch_add_dep could flush and unref dep, so
565 * we need to hold a reference to keep it live for the
566 * fd_bc_invalidate_batch()
567 */
568 fd_batch_reference(&b, dep);
569 fd_batch_add_dep(batch, b);
570 fd_bc_invalidate_batch(b, false);
571 fd_batch_reference_locked(&b, NULL);
572 }
573 }
574 fd_batch_reference_locked(&track->write_batch, batch);
575
576 fd_batch_add_resource(batch, rsc);
577
578 fd_batch_write_prep(batch, rsc);
579 }
580
581 void
fd_batch_resource_read_slowpath(struct fd_batch * batch,struct fd_resource * rsc)582 fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc)
583 {
584 fd_screen_assert_locked(batch->ctx->screen);
585
586 if (rsc->stencil)
587 fd_batch_resource_read(batch, rsc->stencil);
588
589 DBG("%p: read %p", batch, rsc);
590
591 struct fd_resource_tracking *track = rsc->track;
592
593 /* If reading a resource pending a write, go ahead and flush the
594 * writer. This avoids situations where we end up having to
595 * flush the current batch in _resource_used()
596 */
597 if (unlikely(track->write_batch && track->write_batch != batch)) {
598 if (track->write_batch->ctx != batch->ctx) {
599 /* Reading results from another context without flush/barrier
600 * is undefined. Let's simply protect ourself from crashing
601 * by avoiding cross-ctx dependencies and let the app have the
602 * undefined behavior it asked for:
603 */
604 fd_ringbuffer_attach_bo(batch->draw, rsc->bo);
605 return;
606 }
607
608 flush_write_batch(rsc);
609 }
610
611 fd_batch_add_resource(batch, rsc);
612 }
613
614 void
fd_batch_check_size(struct fd_batch * batch)615 fd_batch_check_size(struct fd_batch *batch)
616 {
617 if (batch->num_draws > 100000) {
618 fd_batch_flush(batch);
619 return;
620 }
621
622 /* Place a reasonable upper bound on prim/draw stream buffer size: */
623 const unsigned limit_bits = 8 * 8 * 1024 * 1024;
624 if ((batch->prim_strm_bits > limit_bits) ||
625 (batch->draw_strm_bits > limit_bits)) {
626 fd_batch_flush(batch);
627 return;
628 }
629
630 if (!fd_ringbuffer_check_size(batch->draw))
631 fd_batch_flush(batch);
632 }
633
634 /* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already
635 * been one since last draw:
636 */
637 void
fd_wfi(struct fd_batch * batch,struct fd_ringbuffer * ring)638 fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring)
639 {
640 if (batch->needs_wfi) {
641 if (batch->ctx->screen->gen >= 5)
642 OUT_WFI5(ring);
643 else
644 OUT_WFI(ring);
645 batch->needs_wfi = false;
646 }
647 }
648