• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * Copyright 2019-2020 Collabora, Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <xf86drm.h>
8 #include "asahi/lib/agx_device_virtio.h"
9 #include "asahi/lib/decode.h"
10 #include "util/bitset.h"
11 #include "util/u_dynarray.h"
12 #include "util/u_range.h"
13 #include "agx_state.h"
14 #include "vdrm.h"
15 
16 #define foreach_active(ctx, idx)                                               \
17    BITSET_FOREACH_SET(idx, ctx->batches.active, AGX_MAX_BATCHES)
18 
19 #define foreach_submitted(ctx, idx)                                            \
20    BITSET_FOREACH_SET(idx, ctx->batches.submitted, AGX_MAX_BATCHES)
21 
22 #define batch_debug(batch, fmt, ...)                                           \
23    do {                                                                        \
24       if (unlikely(agx_device(batch->ctx->base.screen)->debug &                \
25                    AGX_DBG_BATCH))                                             \
26          agx_msg("[Queue %u Batch %u] " fmt "\n", batch->ctx->queue_id,        \
27                  agx_batch_idx(batch), ##__VA_ARGS__);                         \
28    } while (0)
29 
30 bool
agx_batch_is_active(struct agx_batch * batch)31 agx_batch_is_active(struct agx_batch *batch)
32 {
33    return BITSET_TEST(batch->ctx->batches.active, agx_batch_idx(batch));
34 }
35 
36 bool
agx_batch_is_submitted(struct agx_batch * batch)37 agx_batch_is_submitted(struct agx_batch *batch)
38 {
39    return BITSET_TEST(batch->ctx->batches.submitted, agx_batch_idx(batch));
40 }
41 
42 static void
agx_batch_mark_active(struct agx_batch * batch)43 agx_batch_mark_active(struct agx_batch *batch)
44 {
45    unsigned batch_idx = agx_batch_idx(batch);
46 
47    batch_debug(batch, "ACTIVE");
48 
49    assert(!BITSET_TEST(batch->ctx->batches.submitted, batch_idx));
50    assert(!BITSET_TEST(batch->ctx->batches.active, batch_idx));
51    BITSET_SET(batch->ctx->batches.active, batch_idx);
52 }
53 
54 static void
agx_batch_mark_submitted(struct agx_batch * batch)55 agx_batch_mark_submitted(struct agx_batch *batch)
56 {
57    unsigned batch_idx = agx_batch_idx(batch);
58 
59    batch_debug(batch, "SUBMIT");
60 
61    assert(BITSET_TEST(batch->ctx->batches.active, batch_idx));
62    assert(!BITSET_TEST(batch->ctx->batches.submitted, batch_idx));
63    BITSET_CLEAR(batch->ctx->batches.active, batch_idx);
64    BITSET_SET(batch->ctx->batches.submitted, batch_idx);
65 }
66 
67 static void
agx_batch_mark_complete(struct agx_batch * batch)68 agx_batch_mark_complete(struct agx_batch *batch)
69 {
70    unsigned batch_idx = agx_batch_idx(batch);
71 
72    batch_debug(batch, "COMPLETE");
73 
74    assert(!BITSET_TEST(batch->ctx->batches.active, batch_idx));
75    assert(BITSET_TEST(batch->ctx->batches.submitted, batch_idx));
76    BITSET_CLEAR(batch->ctx->batches.submitted, batch_idx);
77 }
78 
79 struct agx_encoder
agx_encoder_allocate(struct agx_batch * batch,struct agx_device * dev)80 agx_encoder_allocate(struct agx_batch *batch, struct agx_device *dev)
81 {
82    struct agx_bo *bo = agx_bo_create(dev, 0x80000, 0, 0, "Encoder");
83    uint8_t *map = agx_bo_map(bo);
84    return (struct agx_encoder){.bo = bo, .current = map, .end = map + bo->size};
85 }
86 
87 static void
agx_batch_init(struct agx_context * ctx,const struct pipe_framebuffer_state * key,struct agx_batch * batch)88 agx_batch_init(struct agx_context *ctx,
89                const struct pipe_framebuffer_state *key,
90                struct agx_batch *batch)
91 {
92    struct agx_device *dev = agx_device(ctx->base.screen);
93    struct agx_screen *screen = agx_screen(ctx->base.screen);
94 
95    batch->ctx = ctx;
96    util_copy_framebuffer_state(&batch->key, key);
97    batch->seqnum = ++ctx->batches.seqnum;
98 
99    agx_bo_reference(screen->rodata);
100    agx_pool_init(&batch->pool, dev, "Batch pool", 0, true);
101    agx_pool_init(&batch->pipeline_pool, dev, "Batch low VA pool", AGX_BO_LOW_VA,
102                  true);
103 
104    /* These allocations can happen only once and will just be zeroed (not freed)
105     * during batch clean up. The memory is owned by the context.
106     */
107    if (!batch->bo_list.set) {
108       batch->bo_list.set = rzalloc_array(ctx, BITSET_WORD, 128);
109       batch->bo_list.bit_count = 128 * sizeof(BITSET_WORD) * 8;
110    } else {
111       memset(batch->bo_list.set, 0, batch->bo_list.bit_count / 8);
112    }
113 
114    if (agx_batch_is_compute(batch)) {
115       batch->cdm = agx_encoder_allocate(batch, dev);
116       memset(&batch->vdm, 0, sizeof(batch->vdm));
117    } else {
118       batch->vdm = agx_encoder_allocate(batch, dev);
119       memset(&batch->cdm, 0, sizeof(batch->cdm));
120    }
121 
122    util_dynarray_init(&batch->scissor, ctx);
123    util_dynarray_init(&batch->depth_bias, ctx);
124    util_dynarray_init(&batch->timestamps, ctx);
125 
126    batch->clear = 0;
127    batch->draw = 0;
128    batch->load = 0;
129    batch->resolve = 0;
130    batch->feedback = 0;
131    memset(batch->uploaded_clear_color, 0, sizeof(batch->uploaded_clear_color));
132    batch->clear_depth = 0;
133    batch->clear_stencil = 0;
134    batch->varyings = 0;
135    batch->geometry_state = 0;
136    batch->initialized = false;
137    batch->draws = 0;
138    batch->incoherent_writes = false;
139    agx_bo_unreference(dev, batch->sampler_heap.bo);
140    batch->sampler_heap.bo = NULL;
141    batch->sampler_heap.count = 0;
142    batch->vs_scratch = false;
143    batch->fs_scratch = false;
144    batch->cs_scratch = false;
145    batch->vs_preamble_scratch = 0;
146    batch->fs_preamble_scratch = 0;
147    batch->cs_preamble_scratch = 0;
148 
149    /* May get read before write, need to initialize to 0 to avoid GPU-side UAF
150     * conditions.
151     */
152    batch->uniforms.tables[AGX_SYSVAL_TABLE_PARAMS] = 0;
153 
154    /* We need to emit prim state at the start. Max collides with all. */
155    batch->reduced_prim = MESA_PRIM_COUNT;
156 
157    if (!batch->syncobj) {
158       int ret = drmSyncobjCreate(dev->fd, 0, &batch->syncobj);
159       assert(!ret && batch->syncobj);
160    }
161 
162    batch->result_off =
163       (2 * sizeof(union agx_batch_result)) * agx_batch_idx(batch);
164    batch->result =
165       (void *)(((uint8_t *)agx_bo_map(ctx->result_buf)) + batch->result_off);
166    memset(batch->result, 0, sizeof(union agx_batch_result) * 2);
167 
168    agx_batch_mark_active(batch);
169 }
170 
171 const char *status_str[] = {
172    [DRM_ASAHI_STATUS_PENDING] = "(pending)",
173    [DRM_ASAHI_STATUS_COMPLETE] = "Complete",
174    [DRM_ASAHI_STATUS_UNKNOWN_ERROR] = "UNKNOWN ERROR",
175    [DRM_ASAHI_STATUS_TIMEOUT] = "TIMEOUT",
176    [DRM_ASAHI_STATUS_FAULT] = "FAULT",
177    [DRM_ASAHI_STATUS_KILLED] = "KILLED",
178    [DRM_ASAHI_STATUS_NO_DEVICE] = "NO DEVICE",
179 };
180 
181 const char *fault_type_str[] = {
182    [DRM_ASAHI_FAULT_NONE] = "(none)",
183    [DRM_ASAHI_FAULT_UNKNOWN] = "Unknown",
184    [DRM_ASAHI_FAULT_UNMAPPED] = "Unmapped",
185    [DRM_ASAHI_FAULT_AF_FAULT] = "AF Fault",
186    [DRM_ASAHI_FAULT_WRITE_ONLY] = "Write Only",
187    [DRM_ASAHI_FAULT_READ_ONLY] = "Read Only",
188    [DRM_ASAHI_FAULT_NO_ACCESS] = "No Access",
189 };
190 
191 const char *low_unit_str[16] = {
192    "DCMP", "UL1C", "CMP", "GSL1",    "IAP", "VCE",    "TE",  "RAS",
193    "VDM",  "PPP",  "IPF", "IPF_CPF", "VF",  "VF_CPF", "ZLS", "UNK",
194 };
195 
196 const char *mid_unit_str[16] = {
197    "UNK",     "dPM",      "dCDM_KS0", "dCDM_KS1", "dCDM_KS2", "dIPP",
198    "dIPP_CS", "dVDM_CSD", "dVDM_SSD", "dVDM_ILF", "dVDM_ILD", "dRDE0",
199    "dRDE1",   "FC",       "GSL2",     "UNK",
200 };
201 
202 const char *high_unit_str[16] = {
203    "gPM_SP",         "gVDM_CSD_SP", "gVDM_SSD_SP",    "gVDM_ILF_SP",
204    "gVDM_TFP_SP",    "gVDM_MMB_SP", "gCDM_CS_KS0_SP", "gCDM_CS_KS1_SP",
205    "gCDM_CS_KS2_SP", "gCDM_KS0_SP", "gCDM_KS1_SP",    "gCDM_KS2_SP",
206    "gIPP_SP",        "gIPP_CS_SP",  "gRDE0_SP",       "gRDE1_SP",
207 };
208 
209 static void
agx_print_result(struct agx_device * dev,struct agx_context * ctx,struct drm_asahi_result_info * info,unsigned batch_idx,bool is_compute)210 agx_print_result(struct agx_device *dev, struct agx_context *ctx,
211                  struct drm_asahi_result_info *info, unsigned batch_idx,
212                  bool is_compute)
213 {
214    if (unlikely(info->status != DRM_ASAHI_STATUS_COMPLETE)) {
215       ctx->any_faults = true;
216    }
217 
218    if (likely(info->status == DRM_ASAHI_STATUS_COMPLETE &&
219               !((dev)->debug & AGX_DBG_STATS)))
220       return;
221 
222    if (is_compute) {
223       struct drm_asahi_result_compute *r = (void *)info;
224       float time = (r->ts_end - r->ts_start) / dev->params.timer_frequency_hz;
225 
226       mesa_logw(
227          "[Batch %d] Compute %s: %.06f\n", batch_idx,
228          info->status < ARRAY_SIZE(status_str) ? status_str[info->status] : "?",
229          time);
230    } else {
231       struct drm_asahi_result_render *r = (void *)info;
232       float time_vtx = (r->vertex_ts_end - r->vertex_ts_start) /
233                        (float)dev->params.timer_frequency_hz;
234       float time_frag = (r->fragment_ts_end - r->fragment_ts_start) /
235                         (float)dev->params.timer_frequency_hz;
236       mesa_logw(
237          "[Batch %d] Render %s: TVB %9ld/%9ld bytes (%d ovf) %c%c%c | vtx %.06f frag %.06f\n",
238          batch_idx,
239          info->status < ARRAY_SIZE(status_str) ? status_str[info->status] : "?",
240          (long)r->tvb_usage_bytes, (long)r->tvb_size_bytes,
241          (int)r->num_tvb_overflows,
242          r->flags & DRM_ASAHI_RESULT_RENDER_TVB_GROW_OVF ? 'G' : ' ',
243          r->flags & DRM_ASAHI_RESULT_RENDER_TVB_GROW_MIN ? 'M' : ' ',
244          r->flags & DRM_ASAHI_RESULT_RENDER_TVB_OVERFLOWED ? 'O' : ' ',
245          time_vtx, time_frag);
246    }
247 
248    if (info->fault_type != DRM_ASAHI_FAULT_NONE) {
249       const char *unit_name;
250       int unit_index;
251 
252       switch (info->unit) {
253       case 0x00 ... 0x9f:
254          unit_name = low_unit_str[info->unit & 0xf];
255          unit_index = info->unit >> 4;
256          break;
257       case 0xa0 ... 0xaf:
258          unit_name = mid_unit_str[info->unit & 0xf];
259          unit_index = 0;
260          break;
261       case 0xb0 ... 0xb7:
262          unit_name = "GL2CC_META";
263          unit_index = info->unit & 0x7;
264          break;
265       case 0xb8:
266          unit_name = "GL2CC_MB";
267          unit_index = 0;
268          break;
269       case 0xe0 ... 0xff:
270          unit_name = high_unit_str[info->unit & 0xf];
271          unit_index = (info->unit >> 4) & 1;
272          break;
273       default:
274          unit_name = "UNK";
275          unit_index = 0;
276          break;
277       }
278 
279       mesa_logw(
280          "[Batch %d] Fault: %s : Addr 0x%llx %c Unit %02x (%s/%d) SB 0x%02x L%d Extra 0x%x\n",
281          batch_idx,
282          info->fault_type < ARRAY_SIZE(fault_type_str)
283             ? fault_type_str[info->fault_type]
284             : "?",
285          (long long)info->address, info->is_read ? 'r' : 'W', info->unit,
286          unit_name, unit_index, info->sideband, info->level, info->extra);
287 
288       agx_debug_fault(dev, info->address);
289    }
290 
291    assert(info->status == DRM_ASAHI_STATUS_COMPLETE ||
292           info->status == DRM_ASAHI_STATUS_KILLED);
293 }
294 
295 static void
agx_batch_print_stats(struct agx_device * dev,struct agx_batch * batch)296 agx_batch_print_stats(struct agx_device *dev, struct agx_batch *batch)
297 {
298    unsigned batch_idx = agx_batch_idx(batch);
299 
300    if (!batch->result)
301       return;
302 
303    if (batch->cdm.bo) {
304       agx_print_result(dev, batch->ctx, &batch->result[0].compute.info,
305                        batch_idx, true);
306    }
307 
308    if (batch->vdm.bo) {
309       agx_print_result(dev, batch->ctx, &batch->result[1].render.info,
310                        batch_idx, false);
311    }
312 }
313 
314 static void
agx_batch_cleanup(struct agx_context * ctx,struct agx_batch * batch,bool reset)315 agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset)
316 {
317    struct agx_device *dev = agx_device(ctx->base.screen);
318    struct agx_screen *screen = agx_screen(ctx->base.screen);
319 
320    assert(batch->ctx == ctx);
321    assert(agx_batch_is_submitted(batch));
322 
323    assert(ctx->batch != batch);
324 
325    uint64_t begin_ts = ~0, end_ts = 0;
326    if (batch->result) {
327       if (batch->cdm.bo) {
328          begin_ts = MIN2(begin_ts, batch->result[0].compute.ts_start);
329          end_ts = MAX2(end_ts, batch->result[0].compute.ts_end);
330       }
331 
332       if (batch->vdm.bo) {
333          begin_ts = MIN2(begin_ts, batch->result[1].render.vertex_ts_start);
334          end_ts = MAX2(end_ts, batch->result[1].render.fragment_ts_end);
335       }
336    }
337 
338    agx_finish_batch_queries(batch, begin_ts, end_ts);
339 
340    if (reset) {
341       int handle;
342       AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
343          /* We should write no buffers if this is an empty batch */
344          assert(agx_writer_get(ctx, handle) != batch);
345 
346          agx_bo_unreference(dev, agx_lookup_bo(dev, handle));
347       }
348    } else {
349       int handle;
350       AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
351          struct agx_bo *bo = agx_lookup_bo(dev, handle);
352 
353          /* There is no more writer on this context for anything we wrote */
354          struct agx_batch *writer = agx_writer_get(ctx, handle);
355 
356          if (writer == batch)
357             agx_writer_remove(ctx, handle);
358 
359          p_atomic_cmpxchg(&bo->writer,
360                           agx_bo_writer(ctx->queue_id, batch->syncobj), 0);
361 
362          agx_bo_unreference(dev, agx_lookup_bo(dev, handle));
363       }
364    }
365 
366    agx_bo_unreference(dev, screen->rodata);
367    agx_bo_unreference(dev, batch->vdm.bo);
368    agx_bo_unreference(dev, batch->cdm.bo);
369    agx_pool_cleanup(&batch->pool);
370    agx_pool_cleanup(&batch->pipeline_pool);
371 
372    util_dynarray_fini(&batch->scissor);
373    util_dynarray_fini(&batch->depth_bias);
374    util_dynarray_fini(&batch->timestamps);
375 
376    if (!(dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC))) {
377       agx_batch_print_stats(dev, batch);
378    }
379 
380    util_unreference_framebuffer_state(&batch->key);
381    agx_batch_mark_complete(batch);
382 }
383 
384 int
agx_cleanup_batches(struct agx_context * ctx)385 agx_cleanup_batches(struct agx_context *ctx)
386 {
387    struct agx_device *dev = agx_device(ctx->base.screen);
388 
389    unsigned i;
390    unsigned count = 0;
391    struct agx_batch *batches[AGX_MAX_BATCHES];
392    uint32_t syncobjs[AGX_MAX_BATCHES];
393    uint32_t first = 0;
394 
395    foreach_submitted(ctx, i) {
396       batches[count] = &ctx->batches.slots[i];
397       syncobjs[count++] = ctx->batches.slots[i].syncobj;
398    }
399 
400    if (!count)
401       return -1;
402 
403    int ret = drmSyncobjWait(dev->fd, syncobjs, count, 0, 0, &first);
404    assert(!ret || ret == -ETIME);
405    if (ret)
406       return -1;
407 
408    assert(first < AGX_MAX_BATCHES);
409    agx_batch_cleanup(ctx, batches[first], false);
410    return agx_batch_idx(batches[first]);
411 }
412 
413 static struct agx_batch *
agx_get_batch_for_framebuffer(struct agx_context * ctx,const struct pipe_framebuffer_state * state)414 agx_get_batch_for_framebuffer(struct agx_context *ctx,
415                               const struct pipe_framebuffer_state *state)
416 {
417    /* Look if we have a matching batch */
418    unsigned i;
419    foreach_active(ctx, i) {
420       struct agx_batch *candidate = &ctx->batches.slots[i];
421 
422       if (util_framebuffer_state_equal(&candidate->key, state)) {
423          /* We found a match, increase the seqnum for the LRU
424           * eviction logic.
425           */
426          candidate->seqnum = ++ctx->batches.seqnum;
427          return candidate;
428       }
429    }
430 
431    /* Look for a free batch */
432    for (i = 0; i < AGX_MAX_BATCHES; ++i) {
433       if (!BITSET_TEST(ctx->batches.active, i) &&
434           !BITSET_TEST(ctx->batches.submitted, i)) {
435          struct agx_batch *batch = &ctx->batches.slots[i];
436          agx_batch_init(ctx, state, batch);
437          return batch;
438       }
439    }
440 
441    /* Try to clean up one batch */
442    int freed = agx_cleanup_batches(ctx);
443    if (freed >= 0) {
444       struct agx_batch *batch = &ctx->batches.slots[freed];
445       agx_batch_init(ctx, state, batch);
446       return batch;
447    }
448 
449    /* Else, evict something */
450    struct agx_batch *batch = NULL;
451    bool submitted = false;
452    for (i = 0; i < AGX_MAX_BATCHES; ++i) {
453       struct agx_batch *candidate = &ctx->batches.slots[i];
454       bool cand_submitted = BITSET_TEST(ctx->batches.submitted, i);
455 
456       /* Prefer submitted batches first */
457       if (!cand_submitted && submitted)
458          continue;
459 
460       if (!batch || batch->seqnum > candidate->seqnum) {
461          batch = candidate;
462          submitted = cand_submitted;
463       }
464    }
465    assert(batch);
466 
467    agx_sync_batch_for_reason(ctx, batch, "Too many batches");
468 
469    /* Batch is now free */
470    agx_batch_init(ctx, state, batch);
471    return batch;
472 }
473 
474 struct agx_batch *
agx_get_batch(struct agx_context * ctx)475 agx_get_batch(struct agx_context *ctx)
476 {
477    if (!ctx->batch || agx_batch_is_compute(ctx->batch)) {
478       ctx->batch = agx_get_batch_for_framebuffer(ctx, &ctx->framebuffer);
479       agx_dirty_all(ctx);
480    }
481 
482    assert(util_framebuffer_state_equal(&ctx->framebuffer, &ctx->batch->key));
483    return ctx->batch;
484 }
485 
486 struct agx_batch *
agx_get_compute_batch(struct agx_context * ctx)487 agx_get_compute_batch(struct agx_context *ctx)
488 {
489    agx_dirty_all(ctx);
490 
491    struct pipe_framebuffer_state key = {.width = AGX_COMPUTE_BATCH_WIDTH};
492    ctx->batch = agx_get_batch_for_framebuffer(ctx, &key);
493    return ctx->batch;
494 }
495 
496 void
agx_flush_all(struct agx_context * ctx,const char * reason)497 agx_flush_all(struct agx_context *ctx, const char *reason)
498 {
499    unsigned idx;
500    foreach_active(ctx, idx) {
501       if (reason)
502          perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
503 
504       agx_flush_batch(ctx, &ctx->batches.slots[idx]);
505    }
506 }
507 
508 void
agx_flush_batch_for_reason(struct agx_context * ctx,struct agx_batch * batch,const char * reason)509 agx_flush_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch,
510                            const char *reason)
511 {
512    if (reason)
513       perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
514 
515    if (agx_batch_is_active(batch))
516       agx_flush_batch(ctx, batch);
517 }
518 
519 static void
agx_flush_readers_except(struct agx_context * ctx,struct agx_resource * rsrc,struct agx_batch * except,const char * reason,bool sync)520 agx_flush_readers_except(struct agx_context *ctx, struct agx_resource *rsrc,
521                          struct agx_batch *except, const char *reason,
522                          bool sync)
523 {
524    unsigned idx;
525 
526    /* Flush everything to the hardware first */
527    foreach_active(ctx, idx) {
528       struct agx_batch *batch = &ctx->batches.slots[idx];
529 
530       if (batch == except)
531          continue;
532 
533       if (agx_batch_uses_bo(batch, rsrc->bo)) {
534          perf_debug_ctx(ctx, "Flush reader due to: %s\n", reason);
535          agx_flush_batch(ctx, batch);
536       }
537    }
538 
539    /* Then wait on everything if necessary */
540    if (sync) {
541       foreach_submitted(ctx, idx) {
542          struct agx_batch *batch = &ctx->batches.slots[idx];
543 
544          if (batch == except)
545             continue;
546 
547          if (agx_batch_uses_bo(batch, rsrc->bo)) {
548             perf_debug_ctx(ctx, "Sync reader due to: %s\n", reason);
549             agx_sync_batch(ctx, batch);
550          }
551       }
552    }
553 }
554 
555 static void
agx_flush_writer_except(struct agx_context * ctx,struct agx_resource * rsrc,struct agx_batch * except,const char * reason,bool sync)556 agx_flush_writer_except(struct agx_context *ctx, struct agx_resource *rsrc,
557                         struct agx_batch *except, const char *reason, bool sync)
558 {
559    struct agx_batch *writer = agx_writer_get(ctx, rsrc->bo->handle);
560 
561    if (writer && writer != except &&
562        (agx_batch_is_active(writer) || agx_batch_is_submitted(writer))) {
563       if (agx_batch_is_active(writer) || sync) {
564          perf_debug_ctx(ctx, "%s writer due to: %s\n", sync ? "Sync" : "Flush",
565                         reason);
566       }
567       if (agx_batch_is_active(writer))
568          agx_flush_batch(ctx, writer);
569       /* Check for submitted state, because if the batch was a no-op it'll
570        * already be cleaned up */
571       if (sync && agx_batch_is_submitted(writer))
572          agx_sync_batch(ctx, writer);
573    }
574 }
575 
576 bool
agx_any_batch_uses_resource(struct agx_context * ctx,struct agx_resource * rsrc)577 agx_any_batch_uses_resource(struct agx_context *ctx, struct agx_resource *rsrc)
578 {
579    unsigned idx;
580    foreach_active(ctx, idx) {
581       struct agx_batch *batch = &ctx->batches.slots[idx];
582 
583       if (agx_batch_uses_bo(batch, rsrc->bo))
584          return true;
585    }
586 
587    foreach_submitted(ctx, idx) {
588       struct agx_batch *batch = &ctx->batches.slots[idx];
589 
590       if (agx_batch_uses_bo(batch, rsrc->bo))
591          return true;
592    }
593 
594    return false;
595 }
596 
597 void
agx_flush_readers(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)598 agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc,
599                   const char *reason)
600 {
601    agx_flush_readers_except(ctx, rsrc, NULL, reason, false);
602 }
603 
604 void
agx_sync_readers(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)605 agx_sync_readers(struct agx_context *ctx, struct agx_resource *rsrc,
606                  const char *reason)
607 {
608    agx_flush_readers_except(ctx, rsrc, NULL, reason, true);
609 }
610 
611 void
agx_flush_writer(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)612 agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc,
613                  const char *reason)
614 {
615    agx_flush_writer_except(ctx, rsrc, NULL, reason, false);
616 }
617 
618 void
agx_sync_writer(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)619 agx_sync_writer(struct agx_context *ctx, struct agx_resource *rsrc,
620                 const char *reason)
621 {
622    agx_flush_writer_except(ctx, rsrc, NULL, reason, true);
623 }
624 
625 void
agx_batch_reads(struct agx_batch * batch,struct agx_resource * rsrc)626 agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc)
627 {
628    agx_batch_add_bo(batch, rsrc->bo);
629 
630    if (rsrc->separate_stencil)
631       agx_batch_add_bo(batch, rsrc->separate_stencil->bo);
632 
633    /* Don't hazard track fake resources internally created for meta */
634    if (!rsrc->base.screen)
635       return;
636 
637    /* Hazard: read-after-write */
638    agx_flush_writer_except(batch->ctx, rsrc, batch, "Read from another batch",
639                            false);
640 }
641 
642 static void
agx_batch_writes_internal(struct agx_batch * batch,struct agx_resource * rsrc,unsigned level)643 agx_batch_writes_internal(struct agx_batch *batch, struct agx_resource *rsrc,
644                           unsigned level)
645 {
646    struct agx_context *ctx = batch->ctx;
647    struct agx_batch *writer = agx_writer_get(ctx, rsrc->bo->handle);
648 
649    assert(batch->initialized);
650 
651    agx_flush_readers_except(ctx, rsrc, batch, "Write from other batch", false);
652 
653    BITSET_SET(rsrc->data_valid, level);
654 
655    /* Nothing to do if we're already writing */
656    if (writer == batch)
657       return;
658 
659    /* Hazard: writer-after-write, write-after-read */
660    if (writer)
661       agx_flush_writer(ctx, rsrc, "Multiple writers");
662 
663    /* Write is strictly stronger than a read */
664    agx_batch_reads(batch, rsrc);
665 
666    writer = agx_writer_get(ctx, rsrc->bo->handle);
667    assert(!writer || agx_batch_is_submitted(writer));
668 
669    /* We are now the new writer. Disregard the previous writer -- anything that
670     * needs to wait for the writer going forward needs to wait for us.
671     */
672    agx_writer_remove(ctx, rsrc->bo->handle);
673    agx_writer_add(ctx, agx_batch_idx(batch), rsrc->bo->handle);
674    assert(agx_batch_is_active(batch));
675 }
676 
677 void
agx_batch_writes(struct agx_batch * batch,struct agx_resource * rsrc,unsigned level)678 agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc,
679                  unsigned level)
680 {
681    agx_batch_writes_internal(batch, rsrc, level);
682 
683    if (rsrc->base.target == PIPE_BUFFER) {
684       /* Assume BOs written by the GPU are fully valid */
685       rsrc->valid_buffer_range.start = 0;
686       rsrc->valid_buffer_range.end = ~0;
687    }
688 }
689 
690 void
agx_batch_writes_range(struct agx_batch * batch,struct agx_resource * rsrc,unsigned offset,unsigned size)691 agx_batch_writes_range(struct agx_batch *batch, struct agx_resource *rsrc,
692                        unsigned offset, unsigned size)
693 {
694    assert(rsrc->base.target == PIPE_BUFFER);
695    agx_batch_writes_internal(batch, rsrc, 0);
696    util_range_add(&rsrc->base, &rsrc->valid_buffer_range, offset,
697                   offset + size);
698 }
699 
700 static int
agx_get_in_sync(struct agx_context * ctx)701 agx_get_in_sync(struct agx_context *ctx)
702 {
703    struct agx_device *dev = agx_device(ctx->base.screen);
704 
705    if (ctx->in_sync_fd >= 0) {
706       int ret =
707          drmSyncobjImportSyncFile(dev->fd, ctx->in_sync_obj, ctx->in_sync_fd);
708       assert(!ret);
709 
710       close(ctx->in_sync_fd);
711       ctx->in_sync_fd = -1;
712 
713       return ctx->in_sync_obj;
714    } else {
715       return 0;
716    }
717 }
718 
719 static void
agx_add_sync(struct drm_asahi_sync * syncs,unsigned * count,uint32_t handle)720 agx_add_sync(struct drm_asahi_sync *syncs, unsigned *count, uint32_t handle)
721 {
722    if (!handle)
723       return;
724 
725    syncs[(*count)++] = (struct drm_asahi_sync){
726       .sync_type = DRM_ASAHI_SYNC_SYNCOBJ,
727       .handle = handle,
728    };
729 }
730 
731 void
agx_batch_submit(struct agx_context * ctx,struct agx_batch * batch,struct drm_asahi_cmd_compute * compute,struct drm_asahi_cmd_render * render)732 agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
733                  struct drm_asahi_cmd_compute *compute,
734                  struct drm_asahi_cmd_render *render)
735 {
736    struct agx_device *dev = agx_device(ctx->base.screen);
737    struct agx_screen *screen = agx_screen(ctx->base.screen);
738 
739    bool feedback = dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_STATS);
740 
741 #ifndef NDEBUG
742    /* Debug builds always get feedback (for fault checks) */
743    feedback = true;
744 #endif
745 
746    /* Timer queries use the feedback timestamping */
747    feedback |= (batch->timestamps.size > 0);
748 
749    if (!feedback)
750       batch->result = NULL;
751 
752    /* We allocate the worst-case sync array size since this won't be excessive
753     * for most workloads
754     */
755    unsigned max_syncs = batch->bo_list.bit_count + 2;
756    unsigned in_sync_count = 0;
757    unsigned shared_bo_count = 0;
758    struct drm_asahi_sync *in_syncs =
759       malloc(max_syncs * sizeof(struct drm_asahi_sync));
760    struct agx_bo **shared_bos = malloc(max_syncs * sizeof(struct agx_bo *));
761 
762    uint64_t wait_seqid = p_atomic_read(&screen->flush_wait_seqid);
763 
764    struct agx_submit_virt virt = {
765       .vbo_res_id = ctx->result_buf->vbo_res_id,
766    };
767 
768    /* Elide syncing against our own queue */
769    if (wait_seqid && wait_seqid == ctx->flush_my_seqid) {
770       batch_debug(batch,
771                   "Wait sync point %" PRIu64 " is ours, waiting on %" PRIu64
772                   " instead",
773                   wait_seqid, ctx->flush_other_seqid);
774       wait_seqid = ctx->flush_other_seqid;
775    }
776 
777    uint64_t seqid = p_atomic_inc_return(&screen->flush_cur_seqid);
778    assert(seqid > wait_seqid);
779 
780    batch_debug(batch, "Sync point is %" PRIu64, seqid);
781 
782    /* Subtle concurrency note: Since we assign seqids atomically and do
783     * not lock submission across contexts, it is possible for two threads
784     * to submit timeline syncobj updates out of order. As far as I can
785     * tell, this case is handled in the kernel conservatively: it triggers
786     * a fence context bump and effectively "splits" the timeline at the
787     * larger point, causing future lookups for earlier points to return a
788     * later point, waiting more. The signaling code still makes sure all
789     * prior fences have to be signaled before considering a given point
790     * signaled, regardless of order. That's good enough for us.
791     *
792     * (Note: this case breaks drm_syncobj_query_ioctl and for this reason
793     * triggers a DRM_DEBUG message on submission, but we don't use that
794     * so we don't care.)
795     *
796     * This case can be tested by setting seqid = 1 unconditionally here,
797     * causing every single syncobj update to reuse the same timeline point.
798     * Everything still works (but over-synchronizes because this effectively
799     * serializes all submissions once any context flushes once).
800     */
801    struct drm_asahi_sync out_syncs[2] = {
802       {
803          .sync_type = DRM_ASAHI_SYNC_SYNCOBJ,
804          .handle = batch->syncobj,
805       },
806       {
807          .sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
808          .handle = screen->flush_syncobj,
809          .timeline_value = seqid,
810       },
811    };
812 
813    /* This lock protects against a subtle race scenario:
814     * - Context 1 submits and registers itself as writer for a BO
815     * - Context 2 runs the below loop, and finds the writer syncobj
816     * - Context 1 is destroyed,
817     *     - flushing all batches, unregistering itself as a writer, and
818     *     - Destroying syncobjs for all batches
819     * - Context 2 submits, with a now invalid syncobj ID
820     *
821     * Since batch syncobjs are only destroyed on context destruction, we can
822     * protect against this scenario with a screen-wide rwlock to ensure that
823     * the syncobj destroy code cannot run concurrently with any other
824     * submission. If a submit runs before the wrlock is taken, the syncobjs
825     * must still exist (even if the batch was flushed and no longer a writer).
826     * If it runs after the wrlock is released, then by definition the
827     * just-destroyed syncobjs cannot be writers for any BO at that point.
828     *
829     * A screen-wide (not device-wide) rwlock is sufficient because by definition
830     * resources can only be implicitly shared within a screen. Any shared
831     * resources across screens must have been imported and will go through the
832     * AGX_BO_SHARED path instead, which has no race (but is slower).
833     */
834    u_rwlock_rdlock(&screen->destroy_lock);
835 
836    int handle;
837    AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
838       struct agx_bo *bo = agx_lookup_bo(dev, handle);
839 
840       if (bo->flags & AGX_BO_SHARED) {
841          batch_debug(batch, "Waits on shared BO @ 0x%" PRIx64, bo->va->addr);
842 
843          /* Get a sync file fd from the buffer */
844          int in_sync_fd = agx_export_sync_file(dev, bo);
845          assert(in_sync_fd >= 0);
846 
847          /* Create a new syncobj */
848          uint32_t sync_handle;
849          int ret = drmSyncobjCreate(dev->fd, 0, &sync_handle);
850          assert(ret >= 0);
851 
852          /* Import the sync file into it */
853          ret = drmSyncobjImportSyncFile(dev->fd, sync_handle, in_sync_fd);
854          assert(ret >= 0);
855          assert(sync_handle);
856          close(in_sync_fd);
857 
858          /* Add it to our wait list */
859          agx_add_sync(in_syncs, &in_sync_count, sync_handle);
860 
861          /* And keep track of the BO for cloning the out_sync */
862          shared_bos[shared_bo_count++] = bo;
863          if (dev->is_virtio)
864             virt.extres_count++;
865       } else {
866          /* Deal with BOs which are not externally shared, but which have been
867           * written from another context within the same screen. We also need to
868           * wait on these using their syncobj.
869           */
870          uint64_t writer = p_atomic_read_relaxed(&bo->writer);
871          uint32_t queue_id = agx_bo_writer_queue(writer);
872          if (writer && queue_id != ctx->queue_id) {
873             batch_debug(
874                batch, "Waits on inter-context BO @ 0x%" PRIx64 " from queue %u",
875                bo->va->addr, queue_id);
876 
877             agx_add_sync(in_syncs, &in_sync_count,
878                          agx_bo_writer_syncobj(writer));
879             shared_bos[shared_bo_count++] = NULL;
880          }
881       }
882    }
883 
884    if (dev->is_virtio && virt.extres_count) {
885       struct agx_bo **p = shared_bos;
886       virt.extres =
887          malloc(virt.extres_count * sizeof(struct asahi_ccmd_submit_res));
888 
889       for (unsigned i = 0; i < virt.extres_count; i++) {
890          while (!*p)
891             p++; // Skip inter-context slots which are not recorded here
892          virt.extres[i].res_id = (*p)->vbo_res_id;
893          virt.extres[i].flags = ASAHI_EXTRES_READ | ASAHI_EXTRES_WRITE;
894          p++;
895       }
896    }
897 
898    if (dev->debug & AGX_DBG_SCRATCH) {
899       if (compute)
900          agx_scratch_debug_pre(&ctx->scratch_cs);
901       if (render) {
902          agx_scratch_debug_pre(&ctx->scratch_vs);
903          agx_scratch_debug_pre(&ctx->scratch_fs);
904       }
905    }
906 
907    /* Add an explicit fence from gallium, if any */
908    agx_add_sync(in_syncs, &in_sync_count, agx_get_in_sync(ctx));
909 
910    /* Add an implicit cross-context flush sync point, if any */
911    if (wait_seqid) {
912       batch_debug(batch, "Waits on inter-context sync point %" PRIu64,
913                   wait_seqid);
914       in_syncs[in_sync_count++] = (struct drm_asahi_sync){
915          .sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
916          .handle = screen->flush_syncobj,
917          .timeline_value = wait_seqid,
918       };
919    }
920 
921    /* Submit! */
922    struct drm_asahi_command commands[2];
923    unsigned command_count = 0;
924 
925    if (compute) {
926       commands[command_count++] = (struct drm_asahi_command){
927          .cmd_type = DRM_ASAHI_CMD_COMPUTE,
928          .flags = 0,
929          .cmd_buffer = (uint64_t)(uintptr_t)compute,
930 
931          /* Work around for shipping 6.11.8 kernels, remove when we bump uapi
932           */
933          .cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute) - 8,
934          .result_offset = feedback ? batch->result_off : 0,
935          .result_size = feedback ? sizeof(union agx_batch_result) : 0,
936          /* Barrier on previous submission */
937          .barriers = {0, 0},
938       };
939    }
940 
941    if (render) {
942       commands[command_count++] = (struct drm_asahi_command){
943          .cmd_type = DRM_ASAHI_CMD_RENDER,
944          .flags = 0,
945          .cmd_buffer = (uint64_t)(uintptr_t)render,
946          .cmd_buffer_size = sizeof(struct drm_asahi_cmd_render),
947          .result_offset =
948             feedback ? (batch->result_off + sizeof(union agx_batch_result)) : 0,
949          .result_size = feedback ? sizeof(union agx_batch_result) : 0,
950          /* Barrier on previous submission */
951          .barriers = {compute ? DRM_ASAHI_BARRIER_NONE : 0, compute ? 1 : 0},
952       };
953    }
954 
955    struct drm_asahi_submit submit = {
956       .flags = 0,
957       .queue_id = ctx->queue_id,
958       .result_handle = feedback ? ctx->result_buf->handle : 0,
959       .in_sync_count = in_sync_count,
960       .out_sync_count = 2,
961       .command_count = command_count,
962       .in_syncs = (uint64_t)(uintptr_t)(in_syncs),
963       .out_syncs = (uint64_t)(uintptr_t)(out_syncs),
964       .commands = (uint64_t)(uintptr_t)(&commands[0]),
965    };
966 
967    int ret = dev->ops.submit(dev, &submit, &virt);
968 
969    u_rwlock_rdunlock(&screen->destroy_lock);
970 
971    if (ret) {
972       if (compute) {
973          fprintf(stderr, "DRM_IOCTL_ASAHI_SUBMIT compute failed: %m\n");
974       }
975 
976       if (render) {
977          struct drm_asahi_cmd_render *c = render;
978          fprintf(
979             stderr,
980             "DRM_IOCTL_ASAHI_SUBMIT render failed: %m (%dx%d tile %dx%d layers %d samples %d)\n",
981             c->fb_width, c->fb_height, c->utile_width, c->utile_height,
982             c->layers, c->samples);
983       }
984 
985       assert(0);
986    }
987 
988    if (ret == ENODEV)
989       abort();
990 
991    /* Now stash our batch fence into any shared BOs. */
992    if (shared_bo_count) {
993       /* Convert our handle to a sync file */
994       int out_sync_fd = -1;
995       int ret = drmSyncobjExportSyncFile(dev->fd, batch->syncobj, &out_sync_fd);
996       assert(ret >= 0);
997       assert(out_sync_fd >= 0);
998 
999       for (unsigned i = 0; i < shared_bo_count; i++) {
1000          if (!shared_bos[i])
1001             continue;
1002 
1003          batch_debug(batch, "Signals shared BO @ 0x%" PRIx64,
1004                      shared_bos[i]->va->addr);
1005 
1006          /* Free the in_sync handle we just acquired */
1007          ret = drmSyncobjDestroy(dev->fd, in_syncs[i].handle);
1008          assert(ret >= 0);
1009          /* And then import the out_sync sync file into it */
1010          ret = agx_import_sync_file(dev, shared_bos[i], out_sync_fd);
1011          assert(ret >= 0);
1012       }
1013 
1014       close(out_sync_fd);
1015    }
1016 
1017    /* Record the syncobj on each BO we write, so it can be added post-facto as a
1018     * fence if the BO is exported later...
1019     */
1020    AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
1021       struct agx_bo *bo = agx_lookup_bo(dev, handle);
1022       struct agx_batch *writer = agx_writer_get(ctx, handle);
1023 
1024       if (!writer)
1025          continue;
1026 
1027       /* Skip BOs that are written by submitted batches, they're not ours */
1028       if (agx_batch_is_submitted(writer))
1029          continue;
1030 
1031       /* But any BOs written by active batches are ours */
1032       assert(writer == batch && "exclusive writer");
1033       p_atomic_set(&bo->writer, agx_bo_writer(ctx->queue_id, batch->syncobj));
1034       batch_debug(batch, "Writes to BO @ 0x%" PRIx64, bo->va->addr);
1035    }
1036 
1037    free(in_syncs);
1038    free(shared_bos);
1039 
1040    if (dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_SCRATCH)) {
1041       if (dev->debug & AGX_DBG_TRACE) {
1042          if (compute) {
1043             agxdecode_drm_cmd_compute(dev->agxdecode, &dev->params, compute,
1044                                       true);
1045          }
1046 
1047          if (render) {
1048             agxdecode_drm_cmd_render(dev->agxdecode, &dev->params, render,
1049                                      true);
1050          }
1051 
1052          agxdecode_next_frame();
1053       }
1054 
1055       /* Wait so we can get errors reported back */
1056       int ret = drmSyncobjWait(dev->fd, &batch->syncobj, 1, INT64_MAX, 0, NULL);
1057       assert(!ret);
1058 
1059       agx_batch_print_stats(dev, batch);
1060 
1061       if (dev->debug & AGX_DBG_SCRATCH) {
1062          if (compute) {
1063             fprintf(stderr, "CS scratch:\n");
1064             agx_scratch_debug_post(&ctx->scratch_cs);
1065          }
1066          if (render) {
1067             fprintf(stderr, "VS scratch:\n");
1068             agx_scratch_debug_post(&ctx->scratch_vs);
1069             fprintf(stderr, "FS scratch:\n");
1070             agx_scratch_debug_post(&ctx->scratch_fs);
1071          }
1072       }
1073    }
1074 
1075    agx_batch_mark_submitted(batch);
1076 
1077    if (virt.extres)
1078       free(virt.extres);
1079 
1080    /* Record the last syncobj for fence creation */
1081    ctx->syncobj = batch->syncobj;
1082 
1083    /* Update the last seqid in the context (must only happen if the submit
1084     * succeeded, otherwise the timeline point would not be valid).
1085     */
1086    ctx->flush_last_seqid = seqid;
1087 
1088    if (ctx->batch == batch)
1089       ctx->batch = NULL;
1090 
1091    /* Try to clean up up to two batches, to keep memory usage down */
1092    if (agx_cleanup_batches(ctx) >= 0)
1093       agx_cleanup_batches(ctx);
1094 }
1095 
1096 void
agx_sync_batch(struct agx_context * ctx,struct agx_batch * batch)1097 agx_sync_batch(struct agx_context *ctx, struct agx_batch *batch)
1098 {
1099    struct agx_device *dev = agx_device(ctx->base.screen);
1100 
1101    if (agx_batch_is_active(batch))
1102       agx_flush_batch(ctx, batch);
1103 
1104    /* Empty batch case, already cleaned up */
1105    if (!agx_batch_is_submitted(batch))
1106       return;
1107 
1108    assert(batch->syncobj);
1109    int ret = drmSyncobjWait(dev->fd, &batch->syncobj, 1, INT64_MAX, 0, NULL);
1110    assert(!ret);
1111    agx_batch_cleanup(ctx, batch, false);
1112 }
1113 
1114 void
agx_sync_batch_for_reason(struct agx_context * ctx,struct agx_batch * batch,const char * reason)1115 agx_sync_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch,
1116                           const char *reason)
1117 {
1118    if (reason)
1119       perf_debug_ctx(ctx, "Syncing due to: %s\n", reason);
1120 
1121    agx_sync_batch(ctx, batch);
1122 }
1123 
1124 void
agx_sync_all(struct agx_context * ctx,const char * reason)1125 agx_sync_all(struct agx_context *ctx, const char *reason)
1126 {
1127    if (reason)
1128       perf_debug_ctx(ctx, "Syncing all due to: %s\n", reason);
1129 
1130    unsigned idx;
1131    foreach_active(ctx, idx) {
1132       agx_flush_batch(ctx, &ctx->batches.slots[idx]);
1133    }
1134 
1135    foreach_submitted(ctx, idx) {
1136       agx_sync_batch(ctx, &ctx->batches.slots[idx]);
1137    }
1138 }
1139 
1140 void
agx_batch_reset(struct agx_context * ctx,struct agx_batch * batch)1141 agx_batch_reset(struct agx_context *ctx, struct agx_batch *batch)
1142 {
1143    batch_debug(batch, "RESET");
1144 
1145    assert(!batch->initialized);
1146 
1147    /* Reset an empty batch. Like submit, but does nothing. */
1148    agx_batch_mark_submitted(batch);
1149 
1150    if (ctx->batch == batch)
1151       ctx->batch = NULL;
1152 
1153    /* Elide printing stats */
1154    batch->result = NULL;
1155 
1156    agx_batch_cleanup(ctx, batch, true);
1157 }
1158 
1159 /*
1160  * Timestamp queries record the time after all current work is finished,
1161  * which we handle as the time after all current batches finish (since we're a
1162  * tiler and would rather not split the batch). So add a query to all active
1163  * batches.
1164  */
1165 void
agx_add_timestamp_end_query(struct agx_context * ctx,struct agx_query * q)1166 agx_add_timestamp_end_query(struct agx_context *ctx, struct agx_query *q)
1167 {
1168    unsigned idx;
1169    foreach_active(ctx, idx) {
1170       agx_batch_add_timestamp_query(&ctx->batches.slots[idx], q);
1171    }
1172 }
1173 
1174 /*
1175  * To implement a memory barrier conservatively, flush any batch that contains
1176  * an incoherent memory write (requiring a memory barrier to synchronize). This
1177  * could be further optimized.
1178  */
1179 void
agx_memory_barrier(struct pipe_context * pctx,unsigned flags)1180 agx_memory_barrier(struct pipe_context *pctx, unsigned flags)
1181 {
1182    struct agx_context *ctx = agx_context(pctx);
1183 
1184    unsigned i;
1185    foreach_active(ctx, i) {
1186       struct agx_batch *batch = &ctx->batches.slots[i];
1187 
1188       if (batch->incoherent_writes)
1189          agx_flush_batch_for_reason(ctx, batch, "Memory barrier");
1190    }
1191 }
1192