1 /*
2 * Copyright 2022 Alyssa Rosenzweig
3 * Copyright 2019-2020 Collabora, Ltd.
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include <xf86drm.h>
8 #include "asahi/lib/decode.h"
9 #include "util/bitset.h"
10 #include "util/u_dynarray.h"
11 #include "util/u_range.h"
12 #include "agx_state.h"
13
14 #define foreach_active(ctx, idx) \
15 BITSET_FOREACH_SET(idx, ctx->batches.active, AGX_MAX_BATCHES)
16
17 #define foreach_submitted(ctx, idx) \
18 BITSET_FOREACH_SET(idx, ctx->batches.submitted, AGX_MAX_BATCHES)
19
20 #define batch_debug(batch, fmt, ...) \
21 do { \
22 if (unlikely(agx_device(batch->ctx->base.screen)->debug & \
23 AGX_DBG_BATCH)) \
24 agx_msg("[Batch %u] " fmt "\n", agx_batch_idx(batch), ##__VA_ARGS__); \
25 } while (0)
26
27 bool
agx_batch_is_active(struct agx_batch * batch)28 agx_batch_is_active(struct agx_batch *batch)
29 {
30 return BITSET_TEST(batch->ctx->batches.active, agx_batch_idx(batch));
31 }
32
33 bool
agx_batch_is_submitted(struct agx_batch * batch)34 agx_batch_is_submitted(struct agx_batch *batch)
35 {
36 return BITSET_TEST(batch->ctx->batches.submitted, agx_batch_idx(batch));
37 }
38
39 static void
agx_batch_mark_active(struct agx_batch * batch)40 agx_batch_mark_active(struct agx_batch *batch)
41 {
42 unsigned batch_idx = agx_batch_idx(batch);
43
44 batch_debug(batch, "ACTIVE");
45
46 assert(!BITSET_TEST(batch->ctx->batches.submitted, batch_idx));
47 assert(!BITSET_TEST(batch->ctx->batches.active, batch_idx));
48 BITSET_SET(batch->ctx->batches.active, batch_idx);
49 }
50
51 static void
agx_batch_mark_submitted(struct agx_batch * batch)52 agx_batch_mark_submitted(struct agx_batch *batch)
53 {
54 unsigned batch_idx = agx_batch_idx(batch);
55
56 batch_debug(batch, "SUBMIT");
57
58 assert(BITSET_TEST(batch->ctx->batches.active, batch_idx));
59 assert(!BITSET_TEST(batch->ctx->batches.submitted, batch_idx));
60 BITSET_CLEAR(batch->ctx->batches.active, batch_idx);
61 BITSET_SET(batch->ctx->batches.submitted, batch_idx);
62 }
63
64 static void
agx_batch_mark_complete(struct agx_batch * batch)65 agx_batch_mark_complete(struct agx_batch *batch)
66 {
67 unsigned batch_idx = agx_batch_idx(batch);
68
69 batch_debug(batch, "COMPLETE");
70
71 assert(!BITSET_TEST(batch->ctx->batches.active, batch_idx));
72 assert(BITSET_TEST(batch->ctx->batches.submitted, batch_idx));
73 BITSET_CLEAR(batch->ctx->batches.submitted, batch_idx);
74 }
75
76 struct agx_encoder
agx_encoder_allocate(struct agx_batch * batch,struct agx_device * dev)77 agx_encoder_allocate(struct agx_batch *batch, struct agx_device *dev)
78 {
79 struct agx_bo *bo = agx_bo_create(dev, 0x80000, 0, "Encoder");
80
81 return (struct agx_encoder){
82 .bo = bo,
83 .current = bo->ptr.cpu,
84 .end = (uint8_t *)bo->ptr.cpu + bo->size,
85 };
86 }
87
88 static void
agx_batch_init(struct agx_context * ctx,const struct pipe_framebuffer_state * key,struct agx_batch * batch)89 agx_batch_init(struct agx_context *ctx,
90 const struct pipe_framebuffer_state *key,
91 struct agx_batch *batch)
92 {
93 struct agx_device *dev = agx_device(ctx->base.screen);
94
95 batch->ctx = ctx;
96 util_copy_framebuffer_state(&batch->key, key);
97 batch->seqnum = ++ctx->batches.seqnum;
98
99 agx_pool_init(&batch->pool, dev, 0, true);
100 agx_pool_init(&batch->pipeline_pool, dev, AGX_BO_LOW_VA, true);
101
102 /* These allocations can happen only once and will just be zeroed (not freed)
103 * during batch clean up. The memory is owned by the context.
104 */
105 if (!batch->bo_list.set) {
106 batch->bo_list.set = rzalloc_array(ctx, BITSET_WORD, 128);
107 batch->bo_list.bit_count = 128 * sizeof(BITSET_WORD) * 8;
108 } else {
109 memset(batch->bo_list.set, 0, batch->bo_list.bit_count / 8);
110 }
111
112 if (agx_batch_is_compute(batch)) {
113 batch->cdm = agx_encoder_allocate(batch, dev);
114 memset(&batch->vdm, 0, sizeof(batch->vdm));
115 } else {
116 batch->vdm = agx_encoder_allocate(batch, dev);
117 memset(&batch->cdm, 0, sizeof(batch->cdm));
118 }
119
120 util_dynarray_init(&batch->scissor, ctx);
121 util_dynarray_init(&batch->depth_bias, ctx);
122 util_dynarray_init(&batch->timestamps, ctx);
123
124 batch->clear = 0;
125 batch->draw = 0;
126 batch->load = 0;
127 batch->resolve = 0;
128 memset(batch->uploaded_clear_color, 0, sizeof(batch->uploaded_clear_color));
129 batch->clear_depth = 0;
130 batch->clear_stencil = 0;
131 batch->varyings = 0;
132 batch->geometry_state = 0;
133 batch->initialized = false;
134 batch->draws = 0;
135 batch->incoherent_writes = false;
136 agx_bo_unreference(batch->sampler_heap.bo);
137 batch->sampler_heap.bo = NULL;
138 batch->sampler_heap.count = 0;
139 batch->vs_scratch = false;
140 batch->fs_scratch = false;
141 batch->cs_scratch = false;
142 batch->vs_preamble_scratch = 0;
143 batch->fs_preamble_scratch = 0;
144 batch->cs_preamble_scratch = 0;
145
146 /* We need to emit prim state at the start. Max collides with all. */
147 batch->reduced_prim = MESA_PRIM_COUNT;
148
149 if (!batch->syncobj) {
150 int ret = drmSyncobjCreate(dev->fd, 0, &batch->syncobj);
151 assert(!ret && batch->syncobj);
152 }
153
154 agx_batch_mark_active(batch);
155 }
156
157 static void
agx_batch_print_stats(struct agx_device * dev,struct agx_batch * batch)158 agx_batch_print_stats(struct agx_device *dev, struct agx_batch *batch)
159 {
160 unreachable("Linux UAPI not yet upstream");
161 }
162
163 static void
agx_batch_cleanup(struct agx_context * ctx,struct agx_batch * batch,bool reset)164 agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset)
165 {
166 struct agx_device *dev = agx_device(ctx->base.screen);
167 assert(batch->ctx == ctx);
168 assert(agx_batch_is_submitted(batch));
169
170 assert(ctx->batch != batch);
171
172 uint64_t begin_ts = ~0, end_ts = 0;
173 /* TODO: UAPI pending */
174 agx_finish_batch_queries(batch, begin_ts, end_ts);
175
176 if (reset) {
177 int handle;
178 AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
179 /* We should write no buffers if this is an empty batch */
180 assert(agx_writer_get(ctx, handle) != batch);
181
182 agx_bo_unreference(agx_lookup_bo(dev, handle));
183 }
184 } else {
185 int handle;
186 AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
187 struct agx_bo *bo = agx_lookup_bo(dev, handle);
188
189 /* There is no more writer on this context for anything we wrote */
190 struct agx_batch *writer = agx_writer_get(ctx, handle);
191
192 if (writer == batch)
193 agx_writer_remove(ctx, handle);
194
195 p_atomic_cmpxchg(&bo->writer_syncobj, batch->syncobj, 0);
196
197 agx_bo_unreference(agx_lookup_bo(dev, handle));
198 }
199 }
200
201 agx_bo_unreference(batch->vdm.bo);
202 agx_bo_unreference(batch->cdm.bo);
203 agx_pool_cleanup(&batch->pool);
204 agx_pool_cleanup(&batch->pipeline_pool);
205
206 util_dynarray_fini(&batch->scissor);
207 util_dynarray_fini(&batch->depth_bias);
208 util_dynarray_fini(&batch->timestamps);
209
210 if (!(dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC))) {
211 agx_batch_print_stats(dev, batch);
212 }
213 }
214
215 int
agx_cleanup_batches(struct agx_context * ctx)216 agx_cleanup_batches(struct agx_context *ctx)
217 {
218 struct agx_device *dev = agx_device(ctx->base.screen);
219
220 unsigned i;
221 unsigned count = 0;
222 struct agx_batch *batches[AGX_MAX_BATCHES];
223 uint32_t syncobjs[AGX_MAX_BATCHES];
224 uint32_t first = 0;
225
226 foreach_submitted(ctx, i) {
227 batches[count] = &ctx->batches.slots[i];
228 syncobjs[count++] = ctx->batches.slots[i].syncobj;
229 }
230
231 if (!count)
232 return -1;
233
234 int ret = drmSyncobjWait(dev->fd, syncobjs, count, 0, 0, &first);
235 assert(!ret || ret == -ETIME);
236 if (ret)
237 return -1;
238
239 assert(first < AGX_MAX_BATCHES);
240 agx_batch_cleanup(ctx, batches[first], false);
241 return agx_batch_idx(batches[first]);
242 }
243
244 static struct agx_batch *
agx_get_batch_for_framebuffer(struct agx_context * ctx,const struct pipe_framebuffer_state * state)245 agx_get_batch_for_framebuffer(struct agx_context *ctx,
246 const struct pipe_framebuffer_state *state)
247 {
248 /* Look if we have a matching batch */
249 unsigned i;
250 foreach_active(ctx, i) {
251 struct agx_batch *candidate = &ctx->batches.slots[i];
252
253 if (util_framebuffer_state_equal(&candidate->key, state)) {
254 /* We found a match, increase the seqnum for the LRU
255 * eviction logic.
256 */
257 candidate->seqnum = ++ctx->batches.seqnum;
258 return candidate;
259 }
260 }
261
262 /* Look for a free batch */
263 for (i = 0; i < AGX_MAX_BATCHES; ++i) {
264 if (!BITSET_TEST(ctx->batches.active, i) &&
265 !BITSET_TEST(ctx->batches.submitted, i)) {
266 struct agx_batch *batch = &ctx->batches.slots[i];
267 agx_batch_init(ctx, state, batch);
268 return batch;
269 }
270 }
271
272 /* Try to clean up one batch */
273 int freed = agx_cleanup_batches(ctx);
274 if (freed >= 0) {
275 struct agx_batch *batch = &ctx->batches.slots[freed];
276 agx_batch_init(ctx, state, batch);
277 return batch;
278 }
279
280 /* Else, evict something */
281 struct agx_batch *batch = NULL;
282 bool submitted = false;
283 for (i = 0; i < AGX_MAX_BATCHES; ++i) {
284 struct agx_batch *candidate = &ctx->batches.slots[i];
285 bool cand_submitted = BITSET_TEST(ctx->batches.submitted, i);
286
287 /* Prefer submitted batches first */
288 if (!cand_submitted && submitted)
289 continue;
290
291 if (!batch || batch->seqnum > candidate->seqnum) {
292 batch = candidate;
293 submitted = cand_submitted;
294 }
295 }
296 assert(batch);
297
298 agx_sync_batch_for_reason(ctx, batch, "Too many batches");
299
300 /* Batch is now free */
301 agx_batch_init(ctx, state, batch);
302 return batch;
303 }
304
305 struct agx_batch *
agx_get_batch(struct agx_context * ctx)306 agx_get_batch(struct agx_context *ctx)
307 {
308 if (!ctx->batch || agx_batch_is_compute(ctx->batch)) {
309 ctx->batch = agx_get_batch_for_framebuffer(ctx, &ctx->framebuffer);
310 agx_dirty_all(ctx);
311 }
312
313 assert(util_framebuffer_state_equal(&ctx->framebuffer, &ctx->batch->key));
314 return ctx->batch;
315 }
316
317 struct agx_batch *
agx_get_compute_batch(struct agx_context * ctx)318 agx_get_compute_batch(struct agx_context *ctx)
319 {
320 agx_dirty_all(ctx);
321
322 struct pipe_framebuffer_state key = {.width = AGX_COMPUTE_BATCH_WIDTH};
323 ctx->batch = agx_get_batch_for_framebuffer(ctx, &key);
324 return ctx->batch;
325 }
326
327 void
agx_flush_all(struct agx_context * ctx,const char * reason)328 agx_flush_all(struct agx_context *ctx, const char *reason)
329 {
330 unsigned idx;
331 foreach_active(ctx, idx) {
332 if (reason)
333 perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
334
335 agx_flush_batch(ctx, &ctx->batches.slots[idx]);
336 }
337 }
338
339 void
agx_flush_batch_for_reason(struct agx_context * ctx,struct agx_batch * batch,const char * reason)340 agx_flush_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch,
341 const char *reason)
342 {
343 if (reason)
344 perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
345
346 if (agx_batch_is_active(batch))
347 agx_flush_batch(ctx, batch);
348 }
349
350 static void
agx_flush_readers_except(struct agx_context * ctx,struct agx_resource * rsrc,struct agx_batch * except,const char * reason,bool sync)351 agx_flush_readers_except(struct agx_context *ctx, struct agx_resource *rsrc,
352 struct agx_batch *except, const char *reason,
353 bool sync)
354 {
355 unsigned idx;
356
357 /* Flush everything to the hardware first */
358 foreach_active(ctx, idx) {
359 struct agx_batch *batch = &ctx->batches.slots[idx];
360
361 if (batch == except)
362 continue;
363
364 if (agx_batch_uses_bo(batch, rsrc->bo)) {
365 perf_debug_ctx(ctx, "Flush reader due to: %s\n", reason);
366 agx_flush_batch(ctx, batch);
367 }
368 }
369
370 /* Then wait on everything if necessary */
371 if (sync) {
372 foreach_submitted(ctx, idx) {
373 struct agx_batch *batch = &ctx->batches.slots[idx];
374
375 if (batch == except)
376 continue;
377
378 if (agx_batch_uses_bo(batch, rsrc->bo)) {
379 perf_debug_ctx(ctx, "Sync reader due to: %s\n", reason);
380 agx_sync_batch(ctx, batch);
381 }
382 }
383 }
384 }
385
386 static void
agx_flush_writer_except(struct agx_context * ctx,struct agx_resource * rsrc,struct agx_batch * except,const char * reason,bool sync)387 agx_flush_writer_except(struct agx_context *ctx, struct agx_resource *rsrc,
388 struct agx_batch *except, const char *reason, bool sync)
389 {
390 struct agx_batch *writer = agx_writer_get(ctx, rsrc->bo->handle);
391
392 if (writer && writer != except &&
393 (agx_batch_is_active(writer) || agx_batch_is_submitted(writer))) {
394 if (agx_batch_is_active(writer) || sync) {
395 perf_debug_ctx(ctx, "%s writer due to: %s\n", sync ? "Sync" : "Flush",
396 reason);
397 }
398 if (agx_batch_is_active(writer))
399 agx_flush_batch(ctx, writer);
400 /* Check for submitted state, because if the batch was a no-op it'll
401 * already be cleaned up */
402 if (sync && agx_batch_is_submitted(writer))
403 agx_sync_batch(ctx, writer);
404 }
405 }
406
407 bool
agx_any_batch_uses_resource(struct agx_context * ctx,struct agx_resource * rsrc)408 agx_any_batch_uses_resource(struct agx_context *ctx, struct agx_resource *rsrc)
409 {
410 unsigned idx;
411 foreach_active(ctx, idx) {
412 struct agx_batch *batch = &ctx->batches.slots[idx];
413
414 if (agx_batch_uses_bo(batch, rsrc->bo))
415 return true;
416 }
417
418 foreach_submitted(ctx, idx) {
419 struct agx_batch *batch = &ctx->batches.slots[idx];
420
421 if (agx_batch_uses_bo(batch, rsrc->bo))
422 return true;
423 }
424
425 return false;
426 }
427
428 void
agx_flush_readers(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)429 agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc,
430 const char *reason)
431 {
432 agx_flush_readers_except(ctx, rsrc, NULL, reason, false);
433 }
434
435 void
agx_sync_readers(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)436 agx_sync_readers(struct agx_context *ctx, struct agx_resource *rsrc,
437 const char *reason)
438 {
439 agx_flush_readers_except(ctx, rsrc, NULL, reason, true);
440 }
441
442 void
agx_flush_writer(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)443 agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc,
444 const char *reason)
445 {
446 agx_flush_writer_except(ctx, rsrc, NULL, reason, false);
447 }
448
449 void
agx_sync_writer(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)450 agx_sync_writer(struct agx_context *ctx, struct agx_resource *rsrc,
451 const char *reason)
452 {
453 agx_flush_writer_except(ctx, rsrc, NULL, reason, true);
454 }
455
456 void
agx_batch_reads(struct agx_batch * batch,struct agx_resource * rsrc)457 agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc)
458 {
459 agx_batch_add_bo(batch, rsrc->bo);
460
461 if (rsrc->separate_stencil)
462 agx_batch_add_bo(batch, rsrc->separate_stencil->bo);
463
464 /* Don't hazard track fake resources internally created for meta */
465 if (!rsrc->base.screen)
466 return;
467
468 /* Hazard: read-after-write */
469 agx_flush_writer_except(batch->ctx, rsrc, batch, "Read from another batch",
470 false);
471 }
472
473 static void
agx_batch_writes_internal(struct agx_batch * batch,struct agx_resource * rsrc,unsigned level)474 agx_batch_writes_internal(struct agx_batch *batch, struct agx_resource *rsrc,
475 unsigned level)
476 {
477 struct agx_context *ctx = batch->ctx;
478 struct agx_batch *writer = agx_writer_get(ctx, rsrc->bo->handle);
479
480 assert(batch->initialized);
481
482 agx_flush_readers_except(ctx, rsrc, batch, "Write from other batch", false);
483
484 BITSET_SET(rsrc->data_valid, level);
485
486 /* Nothing to do if we're already writing */
487 if (writer == batch)
488 return;
489
490 /* Hazard: writer-after-write, write-after-read */
491 if (writer)
492 agx_flush_writer(ctx, rsrc, "Multiple writers");
493
494 /* Write is strictly stronger than a read */
495 agx_batch_reads(batch, rsrc);
496
497 writer = agx_writer_get(ctx, rsrc->bo->handle);
498 assert(!writer || agx_batch_is_submitted(writer));
499
500 /* We are now the new writer. Disregard the previous writer -- anything that
501 * needs to wait for the writer going forward needs to wait for us.
502 */
503 agx_writer_remove(ctx, rsrc->bo->handle);
504 agx_writer_add(ctx, agx_batch_idx(batch), rsrc->bo->handle);
505 assert(agx_batch_is_active(batch));
506 }
507
508 void
agx_batch_writes(struct agx_batch * batch,struct agx_resource * rsrc,unsigned level)509 agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc,
510 unsigned level)
511 {
512 agx_batch_writes_internal(batch, rsrc, level);
513
514 if (rsrc->base.target == PIPE_BUFFER) {
515 /* Assume BOs written by the GPU are fully valid */
516 rsrc->valid_buffer_range.start = 0;
517 rsrc->valid_buffer_range.end = ~0;
518 }
519 }
520
521 void
agx_batch_writes_range(struct agx_batch * batch,struct agx_resource * rsrc,unsigned offset,unsigned size)522 agx_batch_writes_range(struct agx_batch *batch, struct agx_resource *rsrc,
523 unsigned offset, unsigned size)
524 {
525 assert(rsrc->base.target == PIPE_BUFFER);
526 agx_batch_writes_internal(batch, rsrc, 0);
527 util_range_add(&rsrc->base, &rsrc->valid_buffer_range, offset,
528 offset + size);
529 }
530
531 static int
agx_get_in_sync(struct agx_context * ctx)532 agx_get_in_sync(struct agx_context *ctx)
533 {
534 struct agx_device *dev = agx_device(ctx->base.screen);
535
536 if (ctx->in_sync_fd >= 0) {
537 int ret =
538 drmSyncobjImportSyncFile(dev->fd, ctx->in_sync_obj, ctx->in_sync_fd);
539 assert(!ret);
540
541 close(ctx->in_sync_fd);
542 ctx->in_sync_fd = -1;
543
544 return ctx->in_sync_obj;
545 } else {
546 return 0;
547 }
548 }
549
550 static void
agx_add_sync(struct drm_asahi_sync * syncs,unsigned * count,uint32_t handle)551 agx_add_sync(struct drm_asahi_sync *syncs, unsigned *count, uint32_t handle)
552 {
553 if (!handle)
554 return;
555
556 syncs[(*count)++] = (struct drm_asahi_sync){
557 .sync_type = DRM_ASAHI_SYNC_SYNCOBJ,
558 .handle = handle,
559 };
560 }
561
562 void
agx_batch_submit(struct agx_context * ctx,struct agx_batch * batch,uint32_t barriers,enum drm_asahi_cmd_type cmd_type,void * cmdbuf)563 agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
564 uint32_t barriers, enum drm_asahi_cmd_type cmd_type,
565 void *cmdbuf)
566 {
567 struct agx_device *dev = agx_device(ctx->base.screen);
568 struct agx_screen *screen = agx_screen(ctx->base.screen);
569
570 bool feedback = dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_STATS);
571
572 #ifndef NDEBUG
573 /* Debug builds always get feedback (for fault checks) */
574 feedback = true;
575 #endif
576
577 if (!feedback)
578 batch->result = NULL;
579
580 /* We allocate the worst-case sync array size since this won't be excessive
581 * for most workloads
582 */
583 unsigned max_syncs = batch->bo_list.bit_count + 1;
584 unsigned in_sync_count = 0;
585 unsigned shared_bo_count = 0;
586 struct drm_asahi_sync *in_syncs =
587 malloc(max_syncs * sizeof(struct drm_asahi_sync));
588 struct agx_bo **shared_bos = malloc(max_syncs * sizeof(struct agx_bo *));
589
590 struct drm_asahi_sync out_sync = {
591 .sync_type = DRM_ASAHI_SYNC_SYNCOBJ,
592 .handle = batch->syncobj,
593 };
594
595 int handle;
596 AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
597 struct agx_bo *bo = agx_lookup_bo(dev, handle);
598
599 if (bo->flags & AGX_BO_SHARED) {
600 batch_debug(batch, "Waits on shared BO @ 0x%" PRIx64, bo->ptr.gpu);
601
602 /* Get a sync file fd from the buffer */
603 int in_sync_fd = agx_export_sync_file(dev, bo);
604 assert(in_sync_fd >= 0);
605
606 /* Create a new syncobj */
607 uint32_t sync_handle;
608 int ret = drmSyncobjCreate(dev->fd, 0, &sync_handle);
609 assert(ret >= 0);
610
611 /* Import the sync file into it */
612 ret = drmSyncobjImportSyncFile(dev->fd, sync_handle, in_sync_fd);
613 assert(ret >= 0);
614 assert(sync_handle);
615 close(in_sync_fd);
616
617 /* Add it to our wait list */
618 agx_add_sync(in_syncs, &in_sync_count, sync_handle);
619
620 /* And keep track of the BO for cloning the out_sync */
621 shared_bos[shared_bo_count++] = bo;
622 }
623 }
624
625 /* Add an explicit fence from gallium, if any */
626 agx_add_sync(in_syncs, &in_sync_count, agx_get_in_sync(ctx));
627
628 /* Submit! */
629 /* TODO: UAPI */
630 (void)screen;
631 (void)out_sync;
632
633 /* Now stash our batch fence into any shared BOs. */
634 if (shared_bo_count) {
635 /* Convert our handle to a sync file */
636 int out_sync_fd = -1;
637 int ret = drmSyncobjExportSyncFile(dev->fd, batch->syncobj, &out_sync_fd);
638 assert(ret >= 0);
639 assert(out_sync_fd >= 0);
640
641 for (unsigned i = 0; i < shared_bo_count; i++) {
642 batch_debug(batch, "Signals shared BO @ 0x%" PRIx64,
643 shared_bos[i]->ptr.gpu);
644
645 /* Free the in_sync handle we just acquired */
646 ret = drmSyncobjDestroy(dev->fd, in_syncs[i].handle);
647 assert(ret >= 0);
648 /* And then import the out_sync sync file into it */
649 ret = agx_import_sync_file(dev, shared_bos[i], out_sync_fd);
650 assert(ret >= 0);
651 }
652
653 close(out_sync_fd);
654 }
655
656 /* Record the syncobj on each BO we write, so it can be added post-facto as a
657 * fence if the BO is exported later...
658 */
659 AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
660 struct agx_bo *bo = agx_lookup_bo(dev, handle);
661 struct agx_batch *writer = agx_writer_get(ctx, handle);
662
663 if (!writer)
664 continue;
665
666 /* Skip BOs that are written by submitted batches, they're not ours */
667 if (agx_batch_is_submitted(writer))
668 continue;
669
670 /* But any BOs written by active batches are ours */
671 assert(writer == batch && "exclusive writer");
672 p_atomic_set(&bo->writer_syncobj, batch->syncobj);
673 }
674
675 free(in_syncs);
676 free(shared_bos);
677
678 if (dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_SCRATCH)) {
679 if (dev->debug & AGX_DBG_TRACE) {
680 /* agxdecode DRM commands */
681 switch (cmd_type) {
682 default:
683 unreachable("Linux UAPI not yet upstream");
684 }
685 agxdecode_next_frame();
686 }
687
688 /* Wait so we can get errors reported back */
689 int ret = drmSyncobjWait(dev->fd, &batch->syncobj, 1, INT64_MAX, 0, NULL);
690 assert(!ret);
691
692 agx_batch_print_stats(dev, batch);
693 }
694
695 agx_batch_mark_submitted(batch);
696
697 /* Record the last syncobj for fence creation */
698 ctx->syncobj = batch->syncobj;
699
700 if (ctx->batch == batch)
701 ctx->batch = NULL;
702
703 /* Try to clean up up to two batches, to keep memory usage down */
704 if (agx_cleanup_batches(ctx) >= 0)
705 agx_cleanup_batches(ctx);
706 }
707
708 void
agx_sync_batch(struct agx_context * ctx,struct agx_batch * batch)709 agx_sync_batch(struct agx_context *ctx, struct agx_batch *batch)
710 {
711 struct agx_device *dev = agx_device(ctx->base.screen);
712
713 if (agx_batch_is_active(batch))
714 agx_flush_batch(ctx, batch);
715
716 /* Empty batch case, already cleaned up */
717 if (!agx_batch_is_submitted(batch))
718 return;
719
720 assert(batch->syncobj);
721 int ret = drmSyncobjWait(dev->fd, &batch->syncobj, 1, INT64_MAX, 0, NULL);
722 assert(!ret);
723 agx_batch_cleanup(ctx, batch, false);
724 }
725
726 void
agx_sync_batch_for_reason(struct agx_context * ctx,struct agx_batch * batch,const char * reason)727 agx_sync_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch,
728 const char *reason)
729 {
730 if (reason)
731 perf_debug_ctx(ctx, "Syncing due to: %s\n", reason);
732
733 agx_sync_batch(ctx, batch);
734 }
735
736 void
agx_sync_all(struct agx_context * ctx,const char * reason)737 agx_sync_all(struct agx_context *ctx, const char *reason)
738 {
739 if (reason)
740 perf_debug_ctx(ctx, "Syncing all due to: %s\n", reason);
741
742 unsigned idx;
743 foreach_active(ctx, idx) {
744 agx_flush_batch(ctx, &ctx->batches.slots[idx]);
745 }
746
747 foreach_submitted(ctx, idx) {
748 agx_sync_batch(ctx, &ctx->batches.slots[idx]);
749 }
750 }
751
752 void
agx_batch_reset(struct agx_context * ctx,struct agx_batch * batch)753 agx_batch_reset(struct agx_context *ctx, struct agx_batch *batch)
754 {
755 batch_debug(batch, "RESET");
756
757 assert(!batch->initialized);
758
759 /* Reset an empty batch. Like submit, but does nothing. */
760 agx_batch_mark_submitted(batch);
761
762 if (ctx->batch == batch)
763 ctx->batch = NULL;
764
765 agx_batch_cleanup(ctx, batch, true);
766 }
767
768 /*
769 * Timestamp queries record the time after all current work is finished,
770 * which we handle as the time after all current batches finish (since we're a
771 * tiler and would rather not split the batch). So add a query to all active
772 * batches.
773 */
774 void
agx_add_timestamp_end_query(struct agx_context * ctx,struct agx_query * q)775 agx_add_timestamp_end_query(struct agx_context *ctx, struct agx_query *q)
776 {
777 unsigned idx;
778 foreach_active(ctx, idx) {
779 agx_batch_add_timestamp_query(&ctx->batches.slots[idx], q);
780 }
781 }
782
783 /*
784 * To implement a memory barrier conservatively, flush any batch that contains
785 * an incoherent memory write (requiring a memory barrier to synchronize). This
786 * could be further optimized.
787 */
788 void
agx_memory_barrier(struct pipe_context * pctx,unsigned flags)789 agx_memory_barrier(struct pipe_context *pctx, unsigned flags)
790 {
791 struct agx_context *ctx = agx_context(pctx);
792
793 unsigned i;
794 foreach_active(ctx, i) {
795 struct agx_batch *batch = &ctx->batches.slots[i];
796
797 if (batch->incoherent_writes)
798 agx_flush_batch_for_reason(ctx, batch, "Memory barrier");
799 }
800 }
801