• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * Copyright 2019-2020 Collabora, Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <xf86drm.h>
8 #include "asahi/lib/decode.h"
9 #include "util/bitset.h"
10 #include "util/u_dynarray.h"
11 #include "util/u_range.h"
12 #include "agx_state.h"
13 
14 #define foreach_active(ctx, idx)                                               \
15    BITSET_FOREACH_SET(idx, ctx->batches.active, AGX_MAX_BATCHES)
16 
17 #define foreach_submitted(ctx, idx)                                            \
18    BITSET_FOREACH_SET(idx, ctx->batches.submitted, AGX_MAX_BATCHES)
19 
20 #define batch_debug(batch, fmt, ...)                                           \
21    do {                                                                        \
22       if (unlikely(agx_device(batch->ctx->base.screen)->debug &                \
23                    AGX_DBG_BATCH))                                             \
24          agx_msg("[Batch %u] " fmt "\n", agx_batch_idx(batch), ##__VA_ARGS__); \
25    } while (0)
26 
27 bool
agx_batch_is_active(struct agx_batch * batch)28 agx_batch_is_active(struct agx_batch *batch)
29 {
30    return BITSET_TEST(batch->ctx->batches.active, agx_batch_idx(batch));
31 }
32 
33 bool
agx_batch_is_submitted(struct agx_batch * batch)34 agx_batch_is_submitted(struct agx_batch *batch)
35 {
36    return BITSET_TEST(batch->ctx->batches.submitted, agx_batch_idx(batch));
37 }
38 
39 static void
agx_batch_mark_active(struct agx_batch * batch)40 agx_batch_mark_active(struct agx_batch *batch)
41 {
42    unsigned batch_idx = agx_batch_idx(batch);
43 
44    batch_debug(batch, "ACTIVE");
45 
46    assert(!BITSET_TEST(batch->ctx->batches.submitted, batch_idx));
47    assert(!BITSET_TEST(batch->ctx->batches.active, batch_idx));
48    BITSET_SET(batch->ctx->batches.active, batch_idx);
49 }
50 
51 static void
agx_batch_mark_submitted(struct agx_batch * batch)52 agx_batch_mark_submitted(struct agx_batch *batch)
53 {
54    unsigned batch_idx = agx_batch_idx(batch);
55 
56    batch_debug(batch, "SUBMIT");
57 
58    assert(BITSET_TEST(batch->ctx->batches.active, batch_idx));
59    assert(!BITSET_TEST(batch->ctx->batches.submitted, batch_idx));
60    BITSET_CLEAR(batch->ctx->batches.active, batch_idx);
61    BITSET_SET(batch->ctx->batches.submitted, batch_idx);
62 }
63 
64 static void
agx_batch_mark_complete(struct agx_batch * batch)65 agx_batch_mark_complete(struct agx_batch *batch)
66 {
67    unsigned batch_idx = agx_batch_idx(batch);
68 
69    batch_debug(batch, "COMPLETE");
70 
71    assert(!BITSET_TEST(batch->ctx->batches.active, batch_idx));
72    assert(BITSET_TEST(batch->ctx->batches.submitted, batch_idx));
73    BITSET_CLEAR(batch->ctx->batches.submitted, batch_idx);
74 }
75 
76 struct agx_encoder
agx_encoder_allocate(struct agx_batch * batch,struct agx_device * dev)77 agx_encoder_allocate(struct agx_batch *batch, struct agx_device *dev)
78 {
79    struct agx_bo *bo = agx_bo_create(dev, 0x80000, 0, "Encoder");
80 
81    return (struct agx_encoder){
82       .bo = bo,
83       .current = bo->ptr.cpu,
84       .end = (uint8_t *)bo->ptr.cpu + bo->size,
85    };
86 }
87 
88 static void
agx_batch_init(struct agx_context * ctx,const struct pipe_framebuffer_state * key,struct agx_batch * batch)89 agx_batch_init(struct agx_context *ctx,
90                const struct pipe_framebuffer_state *key,
91                struct agx_batch *batch)
92 {
93    struct agx_device *dev = agx_device(ctx->base.screen);
94 
95    batch->ctx = ctx;
96    util_copy_framebuffer_state(&batch->key, key);
97    batch->seqnum = ++ctx->batches.seqnum;
98 
99    agx_pool_init(&batch->pool, dev, 0, true);
100    agx_pool_init(&batch->pipeline_pool, dev, AGX_BO_LOW_VA, true);
101 
102    /* These allocations can happen only once and will just be zeroed (not freed)
103     * during batch clean up. The memory is owned by the context.
104     */
105    if (!batch->bo_list.set) {
106       batch->bo_list.set = rzalloc_array(ctx, BITSET_WORD, 128);
107       batch->bo_list.bit_count = 128 * sizeof(BITSET_WORD) * 8;
108    } else {
109       memset(batch->bo_list.set, 0, batch->bo_list.bit_count / 8);
110    }
111 
112    if (agx_batch_is_compute(batch)) {
113       batch->cdm = agx_encoder_allocate(batch, dev);
114       memset(&batch->vdm, 0, sizeof(batch->vdm));
115    } else {
116       batch->vdm = agx_encoder_allocate(batch, dev);
117       memset(&batch->cdm, 0, sizeof(batch->cdm));
118    }
119 
120    util_dynarray_init(&batch->scissor, ctx);
121    util_dynarray_init(&batch->depth_bias, ctx);
122    util_dynarray_init(&batch->timestamps, ctx);
123 
124    batch->clear = 0;
125    batch->draw = 0;
126    batch->load = 0;
127    batch->resolve = 0;
128    memset(batch->uploaded_clear_color, 0, sizeof(batch->uploaded_clear_color));
129    batch->clear_depth = 0;
130    batch->clear_stencil = 0;
131    batch->varyings = 0;
132    batch->geometry_state = 0;
133    batch->initialized = false;
134    batch->draws = 0;
135    batch->incoherent_writes = false;
136    agx_bo_unreference(batch->sampler_heap.bo);
137    batch->sampler_heap.bo = NULL;
138    batch->sampler_heap.count = 0;
139    batch->vs_scratch = false;
140    batch->fs_scratch = false;
141    batch->cs_scratch = false;
142    batch->vs_preamble_scratch = 0;
143    batch->fs_preamble_scratch = 0;
144    batch->cs_preamble_scratch = 0;
145 
146    /* We need to emit prim state at the start. Max collides with all. */
147    batch->reduced_prim = MESA_PRIM_COUNT;
148 
149    if (!batch->syncobj) {
150       int ret = drmSyncobjCreate(dev->fd, 0, &batch->syncobj);
151       assert(!ret && batch->syncobj);
152    }
153 
154    agx_batch_mark_active(batch);
155 }
156 
157 static void
agx_batch_print_stats(struct agx_device * dev,struct agx_batch * batch)158 agx_batch_print_stats(struct agx_device *dev, struct agx_batch *batch)
159 {
160    unreachable("Linux UAPI not yet upstream");
161 }
162 
163 static void
agx_batch_cleanup(struct agx_context * ctx,struct agx_batch * batch,bool reset)164 agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset)
165 {
166    struct agx_device *dev = agx_device(ctx->base.screen);
167    assert(batch->ctx == ctx);
168    assert(agx_batch_is_submitted(batch));
169 
170    assert(ctx->batch != batch);
171 
172    uint64_t begin_ts = ~0, end_ts = 0;
173    /* TODO: UAPI pending */
174    agx_finish_batch_queries(batch, begin_ts, end_ts);
175 
176    if (reset) {
177       int handle;
178       AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
179          /* We should write no buffers if this is an empty batch */
180          assert(agx_writer_get(ctx, handle) != batch);
181 
182          agx_bo_unreference(agx_lookup_bo(dev, handle));
183       }
184    } else {
185       int handle;
186       AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
187          struct agx_bo *bo = agx_lookup_bo(dev, handle);
188 
189          /* There is no more writer on this context for anything we wrote */
190          struct agx_batch *writer = agx_writer_get(ctx, handle);
191 
192          if (writer == batch)
193             agx_writer_remove(ctx, handle);
194 
195          p_atomic_cmpxchg(&bo->writer_syncobj, batch->syncobj, 0);
196 
197          agx_bo_unreference(agx_lookup_bo(dev, handle));
198       }
199    }
200 
201    agx_bo_unreference(batch->vdm.bo);
202    agx_bo_unreference(batch->cdm.bo);
203    agx_pool_cleanup(&batch->pool);
204    agx_pool_cleanup(&batch->pipeline_pool);
205 
206    util_dynarray_fini(&batch->scissor);
207    util_dynarray_fini(&batch->depth_bias);
208    util_dynarray_fini(&batch->timestamps);
209 
210    if (!(dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC))) {
211       agx_batch_print_stats(dev, batch);
212    }
213 }
214 
215 int
agx_cleanup_batches(struct agx_context * ctx)216 agx_cleanup_batches(struct agx_context *ctx)
217 {
218    struct agx_device *dev = agx_device(ctx->base.screen);
219 
220    unsigned i;
221    unsigned count = 0;
222    struct agx_batch *batches[AGX_MAX_BATCHES];
223    uint32_t syncobjs[AGX_MAX_BATCHES];
224    uint32_t first = 0;
225 
226    foreach_submitted(ctx, i) {
227       batches[count] = &ctx->batches.slots[i];
228       syncobjs[count++] = ctx->batches.slots[i].syncobj;
229    }
230 
231    if (!count)
232       return -1;
233 
234    int ret = drmSyncobjWait(dev->fd, syncobjs, count, 0, 0, &first);
235    assert(!ret || ret == -ETIME);
236    if (ret)
237       return -1;
238 
239    assert(first < AGX_MAX_BATCHES);
240    agx_batch_cleanup(ctx, batches[first], false);
241    return agx_batch_idx(batches[first]);
242 }
243 
244 static struct agx_batch *
agx_get_batch_for_framebuffer(struct agx_context * ctx,const struct pipe_framebuffer_state * state)245 agx_get_batch_for_framebuffer(struct agx_context *ctx,
246                               const struct pipe_framebuffer_state *state)
247 {
248    /* Look if we have a matching batch */
249    unsigned i;
250    foreach_active(ctx, i) {
251       struct agx_batch *candidate = &ctx->batches.slots[i];
252 
253       if (util_framebuffer_state_equal(&candidate->key, state)) {
254          /* We found a match, increase the seqnum for the LRU
255           * eviction logic.
256           */
257          candidate->seqnum = ++ctx->batches.seqnum;
258          return candidate;
259       }
260    }
261 
262    /* Look for a free batch */
263    for (i = 0; i < AGX_MAX_BATCHES; ++i) {
264       if (!BITSET_TEST(ctx->batches.active, i) &&
265           !BITSET_TEST(ctx->batches.submitted, i)) {
266          struct agx_batch *batch = &ctx->batches.slots[i];
267          agx_batch_init(ctx, state, batch);
268          return batch;
269       }
270    }
271 
272    /* Try to clean up one batch */
273    int freed = agx_cleanup_batches(ctx);
274    if (freed >= 0) {
275       struct agx_batch *batch = &ctx->batches.slots[freed];
276       agx_batch_init(ctx, state, batch);
277       return batch;
278    }
279 
280    /* Else, evict something */
281    struct agx_batch *batch = NULL;
282    bool submitted = false;
283    for (i = 0; i < AGX_MAX_BATCHES; ++i) {
284       struct agx_batch *candidate = &ctx->batches.slots[i];
285       bool cand_submitted = BITSET_TEST(ctx->batches.submitted, i);
286 
287       /* Prefer submitted batches first */
288       if (!cand_submitted && submitted)
289          continue;
290 
291       if (!batch || batch->seqnum > candidate->seqnum) {
292          batch = candidate;
293          submitted = cand_submitted;
294       }
295    }
296    assert(batch);
297 
298    agx_sync_batch_for_reason(ctx, batch, "Too many batches");
299 
300    /* Batch is now free */
301    agx_batch_init(ctx, state, batch);
302    return batch;
303 }
304 
305 struct agx_batch *
agx_get_batch(struct agx_context * ctx)306 agx_get_batch(struct agx_context *ctx)
307 {
308    if (!ctx->batch || agx_batch_is_compute(ctx->batch)) {
309       ctx->batch = agx_get_batch_for_framebuffer(ctx, &ctx->framebuffer);
310       agx_dirty_all(ctx);
311    }
312 
313    assert(util_framebuffer_state_equal(&ctx->framebuffer, &ctx->batch->key));
314    return ctx->batch;
315 }
316 
317 struct agx_batch *
agx_get_compute_batch(struct agx_context * ctx)318 agx_get_compute_batch(struct agx_context *ctx)
319 {
320    agx_dirty_all(ctx);
321 
322    struct pipe_framebuffer_state key = {.width = AGX_COMPUTE_BATCH_WIDTH};
323    ctx->batch = agx_get_batch_for_framebuffer(ctx, &key);
324    return ctx->batch;
325 }
326 
327 void
agx_flush_all(struct agx_context * ctx,const char * reason)328 agx_flush_all(struct agx_context *ctx, const char *reason)
329 {
330    unsigned idx;
331    foreach_active(ctx, idx) {
332       if (reason)
333          perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
334 
335       agx_flush_batch(ctx, &ctx->batches.slots[idx]);
336    }
337 }
338 
339 void
agx_flush_batch_for_reason(struct agx_context * ctx,struct agx_batch * batch,const char * reason)340 agx_flush_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch,
341                            const char *reason)
342 {
343    if (reason)
344       perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
345 
346    if (agx_batch_is_active(batch))
347       agx_flush_batch(ctx, batch);
348 }
349 
350 static void
agx_flush_readers_except(struct agx_context * ctx,struct agx_resource * rsrc,struct agx_batch * except,const char * reason,bool sync)351 agx_flush_readers_except(struct agx_context *ctx, struct agx_resource *rsrc,
352                          struct agx_batch *except, const char *reason,
353                          bool sync)
354 {
355    unsigned idx;
356 
357    /* Flush everything to the hardware first */
358    foreach_active(ctx, idx) {
359       struct agx_batch *batch = &ctx->batches.slots[idx];
360 
361       if (batch == except)
362          continue;
363 
364       if (agx_batch_uses_bo(batch, rsrc->bo)) {
365          perf_debug_ctx(ctx, "Flush reader due to: %s\n", reason);
366          agx_flush_batch(ctx, batch);
367       }
368    }
369 
370    /* Then wait on everything if necessary */
371    if (sync) {
372       foreach_submitted(ctx, idx) {
373          struct agx_batch *batch = &ctx->batches.slots[idx];
374 
375          if (batch == except)
376             continue;
377 
378          if (agx_batch_uses_bo(batch, rsrc->bo)) {
379             perf_debug_ctx(ctx, "Sync reader due to: %s\n", reason);
380             agx_sync_batch(ctx, batch);
381          }
382       }
383    }
384 }
385 
386 static void
agx_flush_writer_except(struct agx_context * ctx,struct agx_resource * rsrc,struct agx_batch * except,const char * reason,bool sync)387 agx_flush_writer_except(struct agx_context *ctx, struct agx_resource *rsrc,
388                         struct agx_batch *except, const char *reason, bool sync)
389 {
390    struct agx_batch *writer = agx_writer_get(ctx, rsrc->bo->handle);
391 
392    if (writer && writer != except &&
393        (agx_batch_is_active(writer) || agx_batch_is_submitted(writer))) {
394       if (agx_batch_is_active(writer) || sync) {
395          perf_debug_ctx(ctx, "%s writer due to: %s\n", sync ? "Sync" : "Flush",
396                         reason);
397       }
398       if (agx_batch_is_active(writer))
399          agx_flush_batch(ctx, writer);
400       /* Check for submitted state, because if the batch was a no-op it'll
401        * already be cleaned up */
402       if (sync && agx_batch_is_submitted(writer))
403          agx_sync_batch(ctx, writer);
404    }
405 }
406 
407 bool
agx_any_batch_uses_resource(struct agx_context * ctx,struct agx_resource * rsrc)408 agx_any_batch_uses_resource(struct agx_context *ctx, struct agx_resource *rsrc)
409 {
410    unsigned idx;
411    foreach_active(ctx, idx) {
412       struct agx_batch *batch = &ctx->batches.slots[idx];
413 
414       if (agx_batch_uses_bo(batch, rsrc->bo))
415          return true;
416    }
417 
418    foreach_submitted(ctx, idx) {
419       struct agx_batch *batch = &ctx->batches.slots[idx];
420 
421       if (agx_batch_uses_bo(batch, rsrc->bo))
422          return true;
423    }
424 
425    return false;
426 }
427 
428 void
agx_flush_readers(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)429 agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc,
430                   const char *reason)
431 {
432    agx_flush_readers_except(ctx, rsrc, NULL, reason, false);
433 }
434 
435 void
agx_sync_readers(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)436 agx_sync_readers(struct agx_context *ctx, struct agx_resource *rsrc,
437                  const char *reason)
438 {
439    agx_flush_readers_except(ctx, rsrc, NULL, reason, true);
440 }
441 
442 void
agx_flush_writer(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)443 agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc,
444                  const char *reason)
445 {
446    agx_flush_writer_except(ctx, rsrc, NULL, reason, false);
447 }
448 
449 void
agx_sync_writer(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)450 agx_sync_writer(struct agx_context *ctx, struct agx_resource *rsrc,
451                 const char *reason)
452 {
453    agx_flush_writer_except(ctx, rsrc, NULL, reason, true);
454 }
455 
456 void
agx_batch_reads(struct agx_batch * batch,struct agx_resource * rsrc)457 agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc)
458 {
459    agx_batch_add_bo(batch, rsrc->bo);
460 
461    if (rsrc->separate_stencil)
462       agx_batch_add_bo(batch, rsrc->separate_stencil->bo);
463 
464    /* Don't hazard track fake resources internally created for meta */
465    if (!rsrc->base.screen)
466       return;
467 
468    /* Hazard: read-after-write */
469    agx_flush_writer_except(batch->ctx, rsrc, batch, "Read from another batch",
470                            false);
471 }
472 
473 static void
agx_batch_writes_internal(struct agx_batch * batch,struct agx_resource * rsrc,unsigned level)474 agx_batch_writes_internal(struct agx_batch *batch, struct agx_resource *rsrc,
475                           unsigned level)
476 {
477    struct agx_context *ctx = batch->ctx;
478    struct agx_batch *writer = agx_writer_get(ctx, rsrc->bo->handle);
479 
480    assert(batch->initialized);
481 
482    agx_flush_readers_except(ctx, rsrc, batch, "Write from other batch", false);
483 
484    BITSET_SET(rsrc->data_valid, level);
485 
486    /* Nothing to do if we're already writing */
487    if (writer == batch)
488       return;
489 
490    /* Hazard: writer-after-write, write-after-read */
491    if (writer)
492       agx_flush_writer(ctx, rsrc, "Multiple writers");
493 
494    /* Write is strictly stronger than a read */
495    agx_batch_reads(batch, rsrc);
496 
497    writer = agx_writer_get(ctx, rsrc->bo->handle);
498    assert(!writer || agx_batch_is_submitted(writer));
499 
500    /* We are now the new writer. Disregard the previous writer -- anything that
501     * needs to wait for the writer going forward needs to wait for us.
502     */
503    agx_writer_remove(ctx, rsrc->bo->handle);
504    agx_writer_add(ctx, agx_batch_idx(batch), rsrc->bo->handle);
505    assert(agx_batch_is_active(batch));
506 }
507 
508 void
agx_batch_writes(struct agx_batch * batch,struct agx_resource * rsrc,unsigned level)509 agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc,
510                  unsigned level)
511 {
512    agx_batch_writes_internal(batch, rsrc, level);
513 
514    if (rsrc->base.target == PIPE_BUFFER) {
515       /* Assume BOs written by the GPU are fully valid */
516       rsrc->valid_buffer_range.start = 0;
517       rsrc->valid_buffer_range.end = ~0;
518    }
519 }
520 
521 void
agx_batch_writes_range(struct agx_batch * batch,struct agx_resource * rsrc,unsigned offset,unsigned size)522 agx_batch_writes_range(struct agx_batch *batch, struct agx_resource *rsrc,
523                        unsigned offset, unsigned size)
524 {
525    assert(rsrc->base.target == PIPE_BUFFER);
526    agx_batch_writes_internal(batch, rsrc, 0);
527    util_range_add(&rsrc->base, &rsrc->valid_buffer_range, offset,
528                   offset + size);
529 }
530 
531 static int
agx_get_in_sync(struct agx_context * ctx)532 agx_get_in_sync(struct agx_context *ctx)
533 {
534    struct agx_device *dev = agx_device(ctx->base.screen);
535 
536    if (ctx->in_sync_fd >= 0) {
537       int ret =
538          drmSyncobjImportSyncFile(dev->fd, ctx->in_sync_obj, ctx->in_sync_fd);
539       assert(!ret);
540 
541       close(ctx->in_sync_fd);
542       ctx->in_sync_fd = -1;
543 
544       return ctx->in_sync_obj;
545    } else {
546       return 0;
547    }
548 }
549 
550 static void
agx_add_sync(struct drm_asahi_sync * syncs,unsigned * count,uint32_t handle)551 agx_add_sync(struct drm_asahi_sync *syncs, unsigned *count, uint32_t handle)
552 {
553    if (!handle)
554       return;
555 
556    syncs[(*count)++] = (struct drm_asahi_sync){
557       .sync_type = DRM_ASAHI_SYNC_SYNCOBJ,
558       .handle = handle,
559    };
560 }
561 
562 void
agx_batch_submit(struct agx_context * ctx,struct agx_batch * batch,uint32_t barriers,enum drm_asahi_cmd_type cmd_type,void * cmdbuf)563 agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
564                  uint32_t barriers, enum drm_asahi_cmd_type cmd_type,
565                  void *cmdbuf)
566 {
567    struct agx_device *dev = agx_device(ctx->base.screen);
568    struct agx_screen *screen = agx_screen(ctx->base.screen);
569 
570    bool feedback = dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_STATS);
571 
572 #ifndef NDEBUG
573    /* Debug builds always get feedback (for fault checks) */
574    feedback = true;
575 #endif
576 
577    if (!feedback)
578       batch->result = NULL;
579 
580    /* We allocate the worst-case sync array size since this won't be excessive
581     * for most workloads
582     */
583    unsigned max_syncs = batch->bo_list.bit_count + 1;
584    unsigned in_sync_count = 0;
585    unsigned shared_bo_count = 0;
586    struct drm_asahi_sync *in_syncs =
587       malloc(max_syncs * sizeof(struct drm_asahi_sync));
588    struct agx_bo **shared_bos = malloc(max_syncs * sizeof(struct agx_bo *));
589 
590    struct drm_asahi_sync out_sync = {
591       .sync_type = DRM_ASAHI_SYNC_SYNCOBJ,
592       .handle = batch->syncobj,
593    };
594 
595    int handle;
596    AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
597       struct agx_bo *bo = agx_lookup_bo(dev, handle);
598 
599       if (bo->flags & AGX_BO_SHARED) {
600          batch_debug(batch, "Waits on shared BO @ 0x%" PRIx64, bo->ptr.gpu);
601 
602          /* Get a sync file fd from the buffer */
603          int in_sync_fd = agx_export_sync_file(dev, bo);
604          assert(in_sync_fd >= 0);
605 
606          /* Create a new syncobj */
607          uint32_t sync_handle;
608          int ret = drmSyncobjCreate(dev->fd, 0, &sync_handle);
609          assert(ret >= 0);
610 
611          /* Import the sync file into it */
612          ret = drmSyncobjImportSyncFile(dev->fd, sync_handle, in_sync_fd);
613          assert(ret >= 0);
614          assert(sync_handle);
615          close(in_sync_fd);
616 
617          /* Add it to our wait list */
618          agx_add_sync(in_syncs, &in_sync_count, sync_handle);
619 
620          /* And keep track of the BO for cloning the out_sync */
621          shared_bos[shared_bo_count++] = bo;
622       }
623    }
624 
625    /* Add an explicit fence from gallium, if any */
626    agx_add_sync(in_syncs, &in_sync_count, agx_get_in_sync(ctx));
627 
628    /* Submit! */
629    /* TODO: UAPI */
630    (void)screen;
631    (void)out_sync;
632 
633    /* Now stash our batch fence into any shared BOs. */
634    if (shared_bo_count) {
635       /* Convert our handle to a sync file */
636       int out_sync_fd = -1;
637       int ret = drmSyncobjExportSyncFile(dev->fd, batch->syncobj, &out_sync_fd);
638       assert(ret >= 0);
639       assert(out_sync_fd >= 0);
640 
641       for (unsigned i = 0; i < shared_bo_count; i++) {
642          batch_debug(batch, "Signals shared BO @ 0x%" PRIx64,
643                      shared_bos[i]->ptr.gpu);
644 
645          /* Free the in_sync handle we just acquired */
646          ret = drmSyncobjDestroy(dev->fd, in_syncs[i].handle);
647          assert(ret >= 0);
648          /* And then import the out_sync sync file into it */
649          ret = agx_import_sync_file(dev, shared_bos[i], out_sync_fd);
650          assert(ret >= 0);
651       }
652 
653       close(out_sync_fd);
654    }
655 
656    /* Record the syncobj on each BO we write, so it can be added post-facto as a
657     * fence if the BO is exported later...
658     */
659    AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
660       struct agx_bo *bo = agx_lookup_bo(dev, handle);
661       struct agx_batch *writer = agx_writer_get(ctx, handle);
662 
663       if (!writer)
664          continue;
665 
666       /* Skip BOs that are written by submitted batches, they're not ours */
667       if (agx_batch_is_submitted(writer))
668          continue;
669 
670       /* But any BOs written by active batches are ours */
671       assert(writer == batch && "exclusive writer");
672       p_atomic_set(&bo->writer_syncobj, batch->syncobj);
673    }
674 
675    free(in_syncs);
676    free(shared_bos);
677 
678    if (dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_SCRATCH)) {
679       if (dev->debug & AGX_DBG_TRACE) {
680          /* agxdecode DRM commands */
681          switch (cmd_type) {
682          default:
683             unreachable("Linux UAPI not yet upstream");
684          }
685          agxdecode_next_frame();
686       }
687 
688       /* Wait so we can get errors reported back */
689       int ret = drmSyncobjWait(dev->fd, &batch->syncobj, 1, INT64_MAX, 0, NULL);
690       assert(!ret);
691 
692       agx_batch_print_stats(dev, batch);
693    }
694 
695    agx_batch_mark_submitted(batch);
696 
697    /* Record the last syncobj for fence creation */
698    ctx->syncobj = batch->syncobj;
699 
700    if (ctx->batch == batch)
701       ctx->batch = NULL;
702 
703    /* Try to clean up up to two batches, to keep memory usage down */
704    if (agx_cleanup_batches(ctx) >= 0)
705       agx_cleanup_batches(ctx);
706 }
707 
708 void
agx_sync_batch(struct agx_context * ctx,struct agx_batch * batch)709 agx_sync_batch(struct agx_context *ctx, struct agx_batch *batch)
710 {
711    struct agx_device *dev = agx_device(ctx->base.screen);
712 
713    if (agx_batch_is_active(batch))
714       agx_flush_batch(ctx, batch);
715 
716    /* Empty batch case, already cleaned up */
717    if (!agx_batch_is_submitted(batch))
718       return;
719 
720    assert(batch->syncobj);
721    int ret = drmSyncobjWait(dev->fd, &batch->syncobj, 1, INT64_MAX, 0, NULL);
722    assert(!ret);
723    agx_batch_cleanup(ctx, batch, false);
724 }
725 
726 void
agx_sync_batch_for_reason(struct agx_context * ctx,struct agx_batch * batch,const char * reason)727 agx_sync_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch,
728                           const char *reason)
729 {
730    if (reason)
731       perf_debug_ctx(ctx, "Syncing due to: %s\n", reason);
732 
733    agx_sync_batch(ctx, batch);
734 }
735 
736 void
agx_sync_all(struct agx_context * ctx,const char * reason)737 agx_sync_all(struct agx_context *ctx, const char *reason)
738 {
739    if (reason)
740       perf_debug_ctx(ctx, "Syncing all due to: %s\n", reason);
741 
742    unsigned idx;
743    foreach_active(ctx, idx) {
744       agx_flush_batch(ctx, &ctx->batches.slots[idx]);
745    }
746 
747    foreach_submitted(ctx, idx) {
748       agx_sync_batch(ctx, &ctx->batches.slots[idx]);
749    }
750 }
751 
752 void
agx_batch_reset(struct agx_context * ctx,struct agx_batch * batch)753 agx_batch_reset(struct agx_context *ctx, struct agx_batch *batch)
754 {
755    batch_debug(batch, "RESET");
756 
757    assert(!batch->initialized);
758 
759    /* Reset an empty batch. Like submit, but does nothing. */
760    agx_batch_mark_submitted(batch);
761 
762    if (ctx->batch == batch)
763       ctx->batch = NULL;
764 
765    agx_batch_cleanup(ctx, batch, true);
766 }
767 
768 /*
769  * Timestamp queries record the time after all current work is finished,
770  * which we handle as the time after all current batches finish (since we're a
771  * tiler and would rather not split the batch). So add a query to all active
772  * batches.
773  */
774 void
agx_add_timestamp_end_query(struct agx_context * ctx,struct agx_query * q)775 agx_add_timestamp_end_query(struct agx_context *ctx, struct agx_query *q)
776 {
777    unsigned idx;
778    foreach_active(ctx, idx) {
779       agx_batch_add_timestamp_query(&ctx->batches.slots[idx], q);
780    }
781 }
782 
783 /*
784  * To implement a memory barrier conservatively, flush any batch that contains
785  * an incoherent memory write (requiring a memory barrier to synchronize). This
786  * could be further optimized.
787  */
788 void
agx_memory_barrier(struct pipe_context * pctx,unsigned flags)789 agx_memory_barrier(struct pipe_context *pctx, unsigned flags)
790 {
791    struct agx_context *ctx = agx_context(pctx);
792 
793    unsigned i;
794    foreach_active(ctx, i) {
795       struct agx_batch *batch = &ctx->batches.slots[i];
796 
797       if (batch->incoherent_writes)
798          agx_flush_batch_for_reason(ctx, batch, "Memory barrier");
799    }
800 }
801