• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include <assert.h>
28 #include <inttypes.h>
29 #include <pthread.h>
30 
31 #include "util/hash_table.h"
32 #include "util/libsync.h"
33 #include "util/os_file.h"
34 #include "util/slab.h"
35 
36 #include "freedreno_ringbuffer_sp.h"
37 
38 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
39  * by avoiding the additional tracking necessary to build cmds/relocs tables
40  * (but still builds a bos table)
41  */
42 
43 /* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
44  * instead use a condition-variable.  Note that pipe->flush() is not expected
45  * to be a common/hot path.
46  */
47 static pthread_cond_t  flush_cnd = PTHREAD_COND_INITIALIZER;
48 static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
49 
50 static void finalize_current_cmd(struct fd_ringbuffer *ring);
51 static struct fd_ringbuffer *
52 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
53                       enum fd_ringbuffer_flags flags);
54 
55 
56 static inline bool
check_append_suballoc_bo(struct fd_submit_sp * submit,struct fd_bo * bo,bool check)57 check_append_suballoc_bo(struct fd_submit_sp *submit, struct fd_bo *bo, bool check)
58 {
59    uint32_t idx = READ_ONCE(bo->idx);
60 
61    if (unlikely((idx >= submit->nr_suballoc_bos) ||
62        (submit->suballoc_bos[idx] != bo))) {
63       uint32_t hash = _mesa_hash_pointer(bo);
64       struct hash_entry *entry;
65 
66       entry = _mesa_hash_table_search_pre_hashed(
67             submit->suballoc_bo_table, hash, bo);
68       if (entry) {
69          /* found */
70          idx = (uint32_t)(uintptr_t)entry->data;
71       } else if (unlikely(check)) {
72          return false;
73       } else {
74          idx = APPEND(submit, suballoc_bos, fd_bo_ref(bo));
75 
76          _mesa_hash_table_insert_pre_hashed(
77                submit->suballoc_bo_table, hash, bo, (void *)(uintptr_t)idx);
78       }
79       bo->idx = idx;
80    }
81 
82    return true;
83 }
84 
85 static inline uint32_t
check_append_bo(struct fd_submit_sp * submit,struct fd_bo * bo,bool check)86 check_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo, bool check)
87 {
88    if (suballoc_bo(bo)) {
89       if (check) {
90          if (!check_append_suballoc_bo(submit, bo, true)) {
91             return ~0;
92          }
93          bo = fd_bo_heap_block(bo);
94       } else {
95          check_append_suballoc_bo(submit, bo, false);
96          bo = fd_bo_heap_block(bo);
97       }
98    }
99 
100    /* NOTE: it is legal to use the same bo on different threads for
101     * different submits.  But it is not legal to use the same submit
102     * from different threads.
103     */
104    uint32_t idx = READ_ONCE(bo->idx);
105 
106    if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
107       uint32_t hash = _mesa_hash_pointer(bo);
108       struct hash_entry *entry;
109 
110       entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
111       if (entry) {
112          /* found */
113          idx = (uint32_t)(uintptr_t)entry->data;
114       } else if (unlikely(check)) {
115          return ~0;
116       } else {
117          idx = APPEND(submit, bos, fd_bo_ref(bo));
118 
119          _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
120                                             (void *)(uintptr_t)idx);
121       }
122       bo->idx = idx;
123    }
124 
125    return idx;
126 }
127 
128 /* add (if needed) bo to submit and return index: */
129 uint32_t
fd_submit_append_bo(struct fd_submit_sp * submit,struct fd_bo * bo)130 fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
131 {
132    return check_append_bo(submit, bo, false);
133 }
134 
135 static void
fd_submit_suballoc_ring_bo(struct fd_submit * submit,struct fd_ringbuffer_sp * fd_ring,uint32_t size)136 fd_submit_suballoc_ring_bo(struct fd_submit *submit,
137                            struct fd_ringbuffer_sp *fd_ring, uint32_t size)
138 {
139    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
140    unsigned suballoc_offset = 0;
141    struct fd_bo *suballoc_bo = NULL;
142 
143    if (fd_submit->suballoc_ring) {
144       struct fd_ringbuffer_sp *suballoc_ring =
145          to_fd_ringbuffer_sp(fd_submit->suballoc_ring);
146 
147       suballoc_bo = suballoc_ring->ring_bo;
148       suballoc_offset =
149          fd_ringbuffer_size(fd_submit->suballoc_ring) + suballoc_ring->offset;
150 
151       suballoc_offset = align(suballoc_offset, SUBALLOC_ALIGNMENT);
152 
153       if ((size + suballoc_offset) > suballoc_bo->size) {
154          suballoc_bo = NULL;
155       }
156    }
157 
158    if (!suballoc_bo) {
159       // TODO possibly larger size for streaming bo?
160       fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
161       fd_ring->offset = 0;
162    } else {
163       fd_ring->ring_bo = fd_bo_ref(suballoc_bo);
164       fd_ring->offset = suballoc_offset;
165    }
166 
167    struct fd_ringbuffer *old_suballoc_ring = fd_submit->suballoc_ring;
168 
169    fd_submit->suballoc_ring = fd_ringbuffer_ref(&fd_ring->base);
170 
171    if (old_suballoc_ring)
172       fd_ringbuffer_del(old_suballoc_ring);
173 }
174 
175 static struct fd_ringbuffer *
fd_submit_sp_new_ringbuffer(struct fd_submit * submit,uint32_t size,enum fd_ringbuffer_flags flags)176 fd_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
177                             enum fd_ringbuffer_flags flags)
178 {
179    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
180    struct fd_ringbuffer_sp *fd_ring;
181 
182    fd_ring = slab_alloc(&fd_submit->ring_pool);
183 
184    fd_ring->u.submit = submit;
185 
186    /* NOTE: needs to be before _suballoc_ring_bo() since it could
187     * increment the refcnt of the current ring
188     */
189    fd_ring->base.refcnt = 1;
190 
191    if (flags & FD_RINGBUFFER_STREAMING) {
192       fd_submit_suballoc_ring_bo(submit, fd_ring, size);
193    } else {
194       if (flags & FD_RINGBUFFER_GROWABLE)
195          size = SUBALLOC_SIZE;
196 
197       fd_ring->offset = 0;
198       fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
199    }
200 
201    if (!fd_ringbuffer_sp_init(fd_ring, size, flags))
202       return NULL;
203 
204    return &fd_ring->base;
205 }
206 
207 /**
208  * Prepare submit for flush, always done synchronously.
209  *
210  * 1) Finalize primary ringbuffer, at this point no more cmdstream may
211  *    be written into it, since from the PoV of the upper level driver
212  *    the submit is flushed, even if deferred
213  * 2) Add cmdstream bos to bos table
214  * 3) Update bo fences
215  */
216 static bool
fd_submit_sp_flush_prep(struct fd_submit * submit,int in_fence_fd,struct fd_fence * out_fence)217 fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
218                         struct fd_fence *out_fence)
219 {
220    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
221    bool has_shared = false;
222 
223    finalize_current_cmd(submit->primary);
224 
225    struct fd_ringbuffer_sp *primary =
226       to_fd_ringbuffer_sp(submit->primary);
227 
228    for (unsigned i = 0; i < primary->u.nr_cmds; i++)
229       fd_submit_append_bo(fd_submit, primary->u.cmds[i].ring_bo);
230 
231    out_fence->ufence = submit->fence;
232 
233    simple_mtx_lock(&fence_lock);
234    for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
235       fd_bo_add_fence(fd_submit->bos[i], out_fence);
236       has_shared |= fd_submit->bos[i]->alloc_flags & FD_BO_SHARED;
237    }
238    for (unsigned i = 0; i < fd_submit->nr_suballoc_bos; i++) {
239       fd_bo_add_fence(fd_submit->suballoc_bos[i], out_fence);
240    }
241    simple_mtx_unlock(&fence_lock);
242 
243    fd_submit->out_fence   = fd_fence_ref(out_fence);
244    fd_submit->in_fence_fd = (in_fence_fd == -1) ?
245          -1 : os_dupfd_cloexec(in_fence_fd);
246 
247    return has_shared;
248 }
249 
250 static void
fd_submit_sp_flush_execute(void * job,void * gdata,int thread_index)251 fd_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
252 {
253    struct fd_submit *submit = job;
254    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
255    struct fd_pipe *pipe = submit->pipe;
256 
257    fd_submit->flush_submit_list(&fd_submit->submit_list);
258 
259    pthread_mutex_lock(&flush_mtx);
260    assert(fd_fence_before(pipe->last_submit_fence, fd_submit->base.fence));
261    pipe->last_submit_fence = fd_submit->base.fence;
262    pthread_cond_broadcast(&flush_cnd);
263    pthread_mutex_unlock(&flush_mtx);
264 
265    DEBUG_MSG("finish: %u", submit->fence);
266 }
267 
268 static void
fd_submit_sp_flush_cleanup(void * job,void * gdata,int thread_index)269 fd_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
270 {
271    struct fd_submit *submit = job;
272    fd_submit_del(submit);
273 }
274 
275 static void
flush_deferred_submits(struct fd_device * dev)276 flush_deferred_submits(struct fd_device *dev)
277 {
278    MESA_TRACE_FUNC();
279 
280    simple_mtx_assert_locked(&dev->submit_lock);
281 
282    if (list_is_empty(&dev->deferred_submits))
283       return;
284 
285    struct fd_submit *submit = last_submit(&dev->deferred_submits);
286    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
287    list_replace(&dev->deferred_submits, &fd_submit->submit_list);
288    list_inithead(&dev->deferred_submits);
289    dev->deferred_cmds = 0;
290 
291    /* If we have multiple submits with in-fence-fd's then merge them: */
292    foreach_submit (submit, &fd_submit->submit_list) {
293       struct fd_submit_sp *fd_deferred_submit = to_fd_submit_sp(submit);
294 
295       if (fd_deferred_submit == fd_submit)
296          break;
297 
298       if (fd_deferred_submit->in_fence_fd != -1) {
299          sync_accumulate("freedreno",
300                          &fd_submit->in_fence_fd,
301                          fd_deferred_submit->in_fence_fd);
302          close(fd_deferred_submit->in_fence_fd);
303          fd_deferred_submit->in_fence_fd = -1;
304       }
305    }
306 
307    fd_fence_del(dev->deferred_submits_fence);
308    dev->deferred_submits_fence = NULL;
309 
310    struct util_queue_fence *fence = &fd_submit->out_fence->ready;
311 
312    DEBUG_MSG("enqueue: %u", submit->fence);
313 
314    if (fd_device_threaded_submit(submit->pipe->dev)) {
315       util_queue_add_job(&submit->pipe->dev->submit_queue,
316                          submit, fence,
317                          fd_submit_sp_flush_execute,
318                          fd_submit_sp_flush_cleanup,
319                          0);
320    } else {
321       fd_submit_sp_flush_execute(submit, NULL, 0);
322       fd_submit_sp_flush_cleanup(submit, NULL, 0);
323    }
324 }
325 
326 static bool
should_defer(struct fd_submit * submit)327 should_defer(struct fd_submit *submit)
328 {
329    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
330 
331    /* if too many bo's, it may not be worth the CPU cost of submit merging: */
332    if (fd_submit->nr_bos > 30)
333       return false;
334 
335    /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
336     * cmds before we exceed the size of the ringbuffer, which results in
337     * deadlock writing into the RB (ie. kernel doesn't finish writing into
338     * the RB so it doesn't kick the GPU to start consuming from the RB)
339     */
340    if (submit->pipe->dev->deferred_cmds > 128)
341       return false;
342 
343    return true;
344 }
345 
346 static struct fd_fence *
fd_submit_sp_flush(struct fd_submit * submit,int in_fence_fd,bool use_fence_fd)347 fd_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, bool use_fence_fd)
348 {
349    struct fd_device *dev = submit->pipe->dev;
350    struct fd_pipe *pipe = submit->pipe;
351 
352    MESA_TRACE_FUNC();
353 
354    /* Acquire lock before flush_prep() because it is possible to race between
355     * this and pipe->flush():
356     */
357    simple_mtx_lock(&dev->submit_lock);
358 
359    /* If there are deferred submits from another fd_pipe, flush them now,
360     * since we can't merge submits from different submitqueue's (ie. they
361     * could have different priority, etc)
362     */
363    if (!list_is_empty(&dev->deferred_submits) &&
364        (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
365       flush_deferred_submits(dev);
366    }
367 
368    list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
369 
370    if (!dev->deferred_submits_fence)
371       dev->deferred_submits_fence = fd_fence_new(submit->pipe, use_fence_fd);
372 
373    struct fd_fence *out_fence = fd_fence_ref(dev->deferred_submits_fence);
374 
375    /* upgrade the out_fence for the deferred submits, if needed: */
376    if (use_fence_fd)
377       out_fence->use_fence_fd = true;
378 
379    bool has_shared = fd_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
380 
381    if ((in_fence_fd != -1) || out_fence->use_fence_fd)
382       pipe->no_implicit_sync = true;
383 
384    /* The rule about skipping submit merging with shared buffers is only
385     * needed for implicit-sync.
386     */
387    if (pipe->no_implicit_sync)
388       has_shared = false;
389 
390    assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence));
391    pipe->last_enqueue_fence = submit->fence;
392 
393    /* If we don't need an out-fence, we can defer the submit.
394     *
395     * TODO we could defer submits with in-fence as well.. if we took our own
396     * reference to the fd, and merged all the in-fence-fd's when we flush the
397     * deferred submits
398     */
399    if (!use_fence_fd && !has_shared && should_defer(submit)) {
400       DEBUG_MSG("defer: %u", submit->fence);
401       dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
402       assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
403       simple_mtx_unlock(&dev->submit_lock);
404 
405       return out_fence;
406    }
407 
408    flush_deferred_submits(dev);
409 
410    simple_mtx_unlock(&dev->submit_lock);
411 
412    return out_fence;
413 }
414 
415 void
fd_pipe_sp_flush(struct fd_pipe * pipe,uint32_t fence)416 fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
417 {
418    struct fd_device *dev = pipe->dev;
419 
420    if (!fd_fence_before(pipe->last_submit_fence, fence))
421       return;
422 
423    MESA_TRACE_FUNC();
424 
425    simple_mtx_lock(&dev->submit_lock);
426 
427    assert(!fd_fence_after(fence, pipe->last_enqueue_fence));
428 
429    flush_deferred_submits(dev);
430 
431    simple_mtx_unlock(&dev->submit_lock);
432 
433    if (!fd_device_threaded_submit(pipe->dev))
434       return;
435 
436    /* Once we are sure that we've enqueued at least up to the requested
437     * submit, we need to be sure that submitq has caught up and flushed
438     * them to the kernel
439     */
440    pthread_mutex_lock(&flush_mtx);
441    while (fd_fence_before(pipe->last_submit_fence, fence)) {
442       pthread_cond_wait(&flush_cnd, &flush_mtx);
443    }
444    pthread_mutex_unlock(&flush_mtx);
445 }
446 
447 static void
fd_submit_sp_destroy(struct fd_submit * submit)448 fd_submit_sp_destroy(struct fd_submit *submit)
449 {
450    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
451 
452    if (fd_submit->suballoc_ring)
453       fd_ringbuffer_del(fd_submit->suballoc_ring);
454 
455    _mesa_hash_table_destroy(fd_submit->bo_table, NULL);
456    _mesa_hash_table_destroy(fd_submit->suballoc_bo_table, NULL);
457 
458    // TODO it would be nice to have a way to assert() if all
459    // rb's haven't been free'd back to the slab, because that is
460    // an indication that we are leaking bo's
461    slab_destroy_child(&fd_submit->ring_pool);
462 
463    fd_bo_del_array(fd_submit->bos, fd_submit->nr_bos);
464    free(fd_submit->bos);
465 
466    fd_bo_del_array(fd_submit->suballoc_bos, fd_submit->nr_suballoc_bos);
467    free(fd_submit->suballoc_bos);
468 
469    if (fd_submit->out_fence)
470       fd_fence_del(fd_submit->out_fence);
471 
472    free(fd_submit);
473 }
474 
475 static const struct fd_submit_funcs submit_funcs = {
476    .new_ringbuffer = fd_submit_sp_new_ringbuffer,
477    .flush = fd_submit_sp_flush,
478    .destroy = fd_submit_sp_destroy,
479 };
480 
481 struct fd_submit *
fd_submit_sp_new(struct fd_pipe * pipe,flush_submit_list_fn flush_submit_list)482 fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list)
483 {
484    struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit));
485    struct fd_submit *submit;
486 
487    fd_submit->bo_table = _mesa_pointer_hash_table_create(NULL);
488    fd_submit->suballoc_bo_table = _mesa_pointer_hash_table_create(NULL);
489 
490    slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool);
491 
492    fd_submit->flush_submit_list = flush_submit_list;
493    fd_submit->seqno = seqno_next(&pipe->submit_seqno);
494 
495    submit = &fd_submit->base;
496    submit->funcs = &submit_funcs;
497 
498    return submit;
499 }
500 
501 void
fd_pipe_sp_ringpool_init(struct fd_pipe * pipe)502 fd_pipe_sp_ringpool_init(struct fd_pipe *pipe)
503 {
504    // TODO tune size:
505    slab_create_parent(&pipe->ring_pool, sizeof(struct fd_ringbuffer_sp), 16);
506 }
507 
508 void
fd_pipe_sp_ringpool_fini(struct fd_pipe * pipe)509 fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe)
510 {
511    if (pipe->ring_pool.num_elements)
512       slab_destroy_parent(&pipe->ring_pool);
513 }
514 
515 static void
finalize_current_cmd(struct fd_ringbuffer * ring)516 finalize_current_cmd(struct fd_ringbuffer *ring)
517 {
518    assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
519 
520    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
521    APPEND(&fd_ring->u, cmds,
522           (struct fd_cmd_sp){
523              .ring_bo = fd_bo_ref(fd_ring->ring_bo),
524              .size = offset_bytes(ring->cur, ring->start),
525           });
526 }
527 
528 static void
fd_ringbuffer_sp_grow(struct fd_ringbuffer * ring,uint32_t size)529 fd_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
530 {
531    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
532    struct fd_pipe *pipe = fd_ring->u.submit->pipe;
533 
534    assert(ring->flags & FD_RINGBUFFER_GROWABLE);
535 
536    finalize_current_cmd(ring);
537 
538    fd_bo_del(fd_ring->ring_bo);
539    fd_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
540 
541    ring->start = fd_bo_map(fd_ring->ring_bo);
542    ring->end = &(ring->start[size / 4]);
543    ring->cur = ring->start;
544    ring->size = size;
545 }
546 
547 static inline bool
fd_ringbuffer_references_bo(struct fd_ringbuffer * ring,struct fd_bo * bo)548 fd_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
549 {
550    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
551 
552    for (int i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
553       if (fd_ring->u.reloc_bos[i] == bo)
554          return true;
555    }
556    return false;
557 }
558 
559 static void
fd_ringbuffer_sp_emit_bo_nonobj(struct fd_ringbuffer * ring,struct fd_bo * bo)560 fd_ringbuffer_sp_emit_bo_nonobj(struct fd_ringbuffer *ring, struct fd_bo *bo)
561 {
562    assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
563 
564    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
565    struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
566 
567    fd_submit_append_bo(fd_submit, bo);
568 }
569 
570 static void
fd_ringbuffer_sp_assert_attached_nonobj(struct fd_ringbuffer * ring,struct fd_bo * bo)571 fd_ringbuffer_sp_assert_attached_nonobj(struct fd_ringbuffer *ring, struct fd_bo *bo)
572 {
573 #ifndef NDEBUG
574    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
575    struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
576    assert(check_append_bo(fd_submit, bo, true) != ~0);
577 #endif
578 }
579 
580 static void
fd_ringbuffer_sp_emit_bo_obj(struct fd_ringbuffer * ring,struct fd_bo * bo)581 fd_ringbuffer_sp_emit_bo_obj(struct fd_ringbuffer *ring, struct fd_bo *bo)
582 {
583    assert(ring->flags & _FD_RINGBUFFER_OBJECT);
584 
585    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
586 
587    /* Avoid emitting duplicate BO references into the list.  Ringbuffer
588     * objects are long-lived, so this saves ongoing work at draw time in
589     * exchange for a bit at context setup/first draw.  And the number of
590     * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
591     * hurt much.
592     */
593    if (!fd_ringbuffer_references_bo(ring, bo)) {
594       APPEND(&fd_ring->u, reloc_bos, fd_bo_ref(bo));
595    }
596 }
597 
598 static void
fd_ringbuffer_sp_assert_attached_obj(struct fd_ringbuffer * ring,struct fd_bo * bo)599 fd_ringbuffer_sp_assert_attached_obj(struct fd_ringbuffer *ring, struct fd_bo *bo)
600 {
601 #ifndef NDEBUG
602    /* If the stateobj already references the bo, nothing more to do: */
603    if (fd_ringbuffer_references_bo(ring, bo))
604       return;
605 
606    /* If not, we need to defer the assert.. because the batch resource
607     * tracking may have attached the bo to the submit that the stateobj
608     * will eventually be referenced by:
609     */
610    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
611    for (int i = 0; i < fd_ring->u.nr_assert_bos; i++)
612       if (fd_ring->u.assert_bos[i] == bo)
613          return;
614 
615    APPEND(&fd_ring->u, assert_bos, fd_bo_ref(bo));
616 #endif
617 }
618 
619 #define PTRSZ 64
620 #include "freedreno_ringbuffer_sp_reloc.h"
621 #undef PTRSZ
622 #define PTRSZ 32
623 #include "freedreno_ringbuffer_sp_reloc.h"
624 #undef PTRSZ
625 
626 static uint32_t
fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer * ring)627 fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
628 {
629    if (ring->flags & FD_RINGBUFFER_GROWABLE)
630       return to_fd_ringbuffer_sp(ring)->u.nr_cmds + 1;
631    return 1;
632 }
633 
634 static bool
fd_ringbuffer_sp_check_size(struct fd_ringbuffer * ring)635 fd_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
636 {
637    assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
638    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
639    struct fd_submit *submit = fd_ring->u.submit;
640 
641    if (to_fd_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
642       return false;
643    }
644 
645    if (to_fd_submit_sp(submit)->nr_suballoc_bos > MAX_ARRAY_SIZE/2) {
646       return false;
647    }
648 
649    return true;
650 }
651 
652 static void
fd_ringbuffer_sp_destroy(struct fd_ringbuffer * ring)653 fd_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
654 {
655    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
656 
657    fd_bo_del(fd_ring->ring_bo);
658 
659    if (ring->flags & _FD_RINGBUFFER_OBJECT) {
660       fd_bo_del_array(fd_ring->u.reloc_bos, fd_ring->u.nr_reloc_bos);
661       free(fd_ring->u.reloc_bos);
662 #ifndef NDEBUG
663       fd_bo_del_array(fd_ring->u.assert_bos, fd_ring->u.nr_assert_bos);
664       free(fd_ring->u.assert_bos);
665 #endif
666       free(fd_ring);
667    } else {
668       struct fd_submit *submit = fd_ring->u.submit;
669 
670       // TODO re-arrange the data structures so we can use fd_bo_del_array()
671       for (unsigned i = 0; i < fd_ring->u.nr_cmds; i++) {
672          fd_bo_del(fd_ring->u.cmds[i].ring_bo);
673       }
674       free(fd_ring->u.cmds);
675 
676       slab_free(&to_fd_submit_sp(submit)->ring_pool, fd_ring);
677    }
678 }
679 
680 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
681    .grow = fd_ringbuffer_sp_grow,
682    .emit_bo = fd_ringbuffer_sp_emit_bo_nonobj,
683    .assert_attached = fd_ringbuffer_sp_assert_attached_nonobj,
684    .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_32,
685    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
686    .cmd_count = fd_ringbuffer_sp_cmd_count,
687    .check_size = fd_ringbuffer_sp_check_size,
688    .destroy = fd_ringbuffer_sp_destroy,
689 };
690 
691 static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
692    .grow = fd_ringbuffer_sp_grow,
693    .emit_bo = fd_ringbuffer_sp_emit_bo_obj,
694    .assert_attached = fd_ringbuffer_sp_assert_attached_obj,
695    .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_32,
696    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
697    .cmd_count = fd_ringbuffer_sp_cmd_count,
698    .destroy = fd_ringbuffer_sp_destroy,
699 };
700 
701 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
702    .grow = fd_ringbuffer_sp_grow,
703    .emit_bo = fd_ringbuffer_sp_emit_bo_nonobj,
704    .assert_attached = fd_ringbuffer_sp_assert_attached_nonobj,
705    .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_64,
706    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
707    .cmd_count = fd_ringbuffer_sp_cmd_count,
708    .check_size = fd_ringbuffer_sp_check_size,
709    .destroy = fd_ringbuffer_sp_destroy,
710 };
711 
712 static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
713    .grow = fd_ringbuffer_sp_grow,
714    .emit_bo = fd_ringbuffer_sp_emit_bo_obj,
715    .assert_attached = fd_ringbuffer_sp_assert_attached_obj,
716    .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_64,
717    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
718    .cmd_count = fd_ringbuffer_sp_cmd_count,
719    .destroy = fd_ringbuffer_sp_destroy,
720 };
721 
722 static inline struct fd_ringbuffer *
fd_ringbuffer_sp_init(struct fd_ringbuffer_sp * fd_ring,uint32_t size,enum fd_ringbuffer_flags flags)723 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
724                       enum fd_ringbuffer_flags flags)
725 {
726    struct fd_ringbuffer *ring = &fd_ring->base;
727 
728    assert(fd_ring->ring_bo);
729 
730    uint8_t *base = fd_bo_map(fd_ring->ring_bo);
731    ring->start = (void *)(base + fd_ring->offset);
732    ring->end = &(ring->start[size / 4]);
733    ring->cur = ring->start;
734 
735    ring->size = size;
736    ring->flags = flags;
737 
738    if (flags & _FD_RINGBUFFER_OBJECT) {
739       if (fd_ring->u.pipe->is_64bit) {
740          ring->funcs = &ring_funcs_obj_64;
741       } else {
742          ring->funcs = &ring_funcs_obj_32;
743       }
744    } else {
745       if (fd_ring->u.submit->pipe->is_64bit) {
746          ring->funcs = &ring_funcs_nonobj_64;
747       } else {
748          ring->funcs = &ring_funcs_nonobj_32;
749       }
750    }
751 
752    // TODO initializing these could probably be conditional on flags
753    // since unneed for FD_RINGBUFFER_STAGING case..
754    fd_ring->u.cmds = NULL;
755    fd_ring->u.nr_cmds = fd_ring->u.max_cmds = 0;
756 
757    fd_ring->u.reloc_bos = NULL;
758    fd_ring->u.nr_reloc_bos = fd_ring->u.max_reloc_bos = 0;
759 #ifndef NDEBUG
760    fd_ring->u.assert_bos = NULL;
761    fd_ring->u.nr_assert_bos = fd_ring->u.max_assert_bos = 0;
762 #endif
763 
764    return ring;
765 }
766 
767 struct fd_ringbuffer *
fd_ringbuffer_sp_new_object(struct fd_pipe * pipe,uint32_t size)768 fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
769 {
770    struct fd_device *dev = pipe->dev;
771    struct fd_ringbuffer_sp *fd_ring = malloc(sizeof(*fd_ring));
772 
773    /* Lock access to the fd_pipe->suballoc_* since ringbuffer object allocation
774     * can happen both on the frontend (most CSOs) and the driver thread (a6xx
775     * cached tex state, for example)
776     */
777    simple_mtx_lock(&dev->suballoc_lock);
778 
779    fd_ring->offset = align(dev->suballoc_offset, SUBALLOC_ALIGNMENT);
780    if (!dev->suballoc_bo ||
781        fd_ring->offset + size > fd_bo_size(dev->suballoc_bo)) {
782       if (dev->suballoc_bo)
783          fd_bo_del(dev->suballoc_bo);
784       dev->suballoc_bo =
785          fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
786       fd_ring->offset = 0;
787    }
788 
789    fd_ring->u.pipe = pipe;
790    fd_ring->ring_bo = fd_bo_ref(dev->suballoc_bo);
791    fd_ring->base.refcnt = 1;
792    fd_ring->u.last_submit_seqno = 0;
793 
794    dev->suballoc_offset = fd_ring->offset + size;
795 
796    simple_mtx_unlock(&dev->suballoc_lock);
797 
798    return fd_ringbuffer_sp_init(fd_ring, size, _FD_RINGBUFFER_OBJECT);
799 }
800