1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include <assert.h>
28 #include <inttypes.h>
29 #include <pthread.h>
30
31 #include "util/hash_table.h"
32 #include "util/libsync.h"
33 #include "util/os_file.h"
34 #include "util/slab.h"
35
36 #include "freedreno_ringbuffer_sp.h"
37
38 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
39 * by avoiding the additional tracking necessary to build cmds/relocs tables
40 * (but still builds a bos table)
41 */
42
43 /* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
44 * instead use a condition-variable. Note that pipe->flush() is not expected
45 * to be a common/hot path.
46 */
47 static pthread_cond_t flush_cnd = PTHREAD_COND_INITIALIZER;
48 static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
49
50 static void finalize_current_cmd(struct fd_ringbuffer *ring);
51 static struct fd_ringbuffer *
52 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
53 enum fd_ringbuffer_flags flags);
54
55
56 static inline bool
check_append_suballoc_bo(struct fd_submit_sp * submit,struct fd_bo * bo,bool check)57 check_append_suballoc_bo(struct fd_submit_sp *submit, struct fd_bo *bo, bool check)
58 {
59 uint32_t idx = READ_ONCE(bo->idx);
60
61 if (unlikely((idx >= submit->nr_suballoc_bos) ||
62 (submit->suballoc_bos[idx] != bo))) {
63 uint32_t hash = _mesa_hash_pointer(bo);
64 struct hash_entry *entry;
65
66 entry = _mesa_hash_table_search_pre_hashed(
67 submit->suballoc_bo_table, hash, bo);
68 if (entry) {
69 /* found */
70 idx = (uint32_t)(uintptr_t)entry->data;
71 } else if (unlikely(check)) {
72 return false;
73 } else {
74 idx = APPEND(submit, suballoc_bos, fd_bo_ref(bo));
75
76 _mesa_hash_table_insert_pre_hashed(
77 submit->suballoc_bo_table, hash, bo, (void *)(uintptr_t)idx);
78 }
79 bo->idx = idx;
80 }
81
82 return true;
83 }
84
85 static inline uint32_t
check_append_bo(struct fd_submit_sp * submit,struct fd_bo * bo,bool check)86 check_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo, bool check)
87 {
88 if (suballoc_bo(bo)) {
89 if (check) {
90 if (!check_append_suballoc_bo(submit, bo, true)) {
91 return ~0;
92 }
93 bo = fd_bo_heap_block(bo);
94 } else {
95 check_append_suballoc_bo(submit, bo, false);
96 bo = fd_bo_heap_block(bo);
97 }
98 }
99
100 /* NOTE: it is legal to use the same bo on different threads for
101 * different submits. But it is not legal to use the same submit
102 * from different threads.
103 */
104 uint32_t idx = READ_ONCE(bo->idx);
105
106 if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
107 uint32_t hash = _mesa_hash_pointer(bo);
108 struct hash_entry *entry;
109
110 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
111 if (entry) {
112 /* found */
113 idx = (uint32_t)(uintptr_t)entry->data;
114 } else if (unlikely(check)) {
115 return ~0;
116 } else {
117 idx = APPEND(submit, bos, fd_bo_ref(bo));
118
119 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
120 (void *)(uintptr_t)idx);
121 }
122 bo->idx = idx;
123 }
124
125 return idx;
126 }
127
128 /* add (if needed) bo to submit and return index: */
129 uint32_t
fd_submit_append_bo(struct fd_submit_sp * submit,struct fd_bo * bo)130 fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
131 {
132 return check_append_bo(submit, bo, false);
133 }
134
135 static void
fd_submit_suballoc_ring_bo(struct fd_submit * submit,struct fd_ringbuffer_sp * fd_ring,uint32_t size)136 fd_submit_suballoc_ring_bo(struct fd_submit *submit,
137 struct fd_ringbuffer_sp *fd_ring, uint32_t size)
138 {
139 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
140 unsigned suballoc_offset = 0;
141 struct fd_bo *suballoc_bo = NULL;
142
143 if (fd_submit->suballoc_ring) {
144 struct fd_ringbuffer_sp *suballoc_ring =
145 to_fd_ringbuffer_sp(fd_submit->suballoc_ring);
146
147 suballoc_bo = suballoc_ring->ring_bo;
148 suballoc_offset =
149 fd_ringbuffer_size(fd_submit->suballoc_ring) + suballoc_ring->offset;
150
151 suballoc_offset = align(suballoc_offset, SUBALLOC_ALIGNMENT);
152
153 if ((size + suballoc_offset) > suballoc_bo->size) {
154 suballoc_bo = NULL;
155 }
156 }
157
158 if (!suballoc_bo) {
159 // TODO possibly larger size for streaming bo?
160 fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
161 fd_ring->offset = 0;
162 } else {
163 fd_ring->ring_bo = fd_bo_ref(suballoc_bo);
164 fd_ring->offset = suballoc_offset;
165 }
166
167 struct fd_ringbuffer *old_suballoc_ring = fd_submit->suballoc_ring;
168
169 fd_submit->suballoc_ring = fd_ringbuffer_ref(&fd_ring->base);
170
171 if (old_suballoc_ring)
172 fd_ringbuffer_del(old_suballoc_ring);
173 }
174
175 static struct fd_ringbuffer *
fd_submit_sp_new_ringbuffer(struct fd_submit * submit,uint32_t size,enum fd_ringbuffer_flags flags)176 fd_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
177 enum fd_ringbuffer_flags flags)
178 {
179 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
180 struct fd_ringbuffer_sp *fd_ring;
181
182 fd_ring = slab_alloc(&fd_submit->ring_pool);
183
184 fd_ring->u.submit = submit;
185
186 /* NOTE: needs to be before _suballoc_ring_bo() since it could
187 * increment the refcnt of the current ring
188 */
189 fd_ring->base.refcnt = 1;
190
191 if (flags & FD_RINGBUFFER_STREAMING) {
192 fd_submit_suballoc_ring_bo(submit, fd_ring, size);
193 } else {
194 if (flags & FD_RINGBUFFER_GROWABLE)
195 size = SUBALLOC_SIZE;
196
197 fd_ring->offset = 0;
198 fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
199 }
200
201 if (!fd_ringbuffer_sp_init(fd_ring, size, flags))
202 return NULL;
203
204 return &fd_ring->base;
205 }
206
207 /**
208 * Prepare submit for flush, always done synchronously.
209 *
210 * 1) Finalize primary ringbuffer, at this point no more cmdstream may
211 * be written into it, since from the PoV of the upper level driver
212 * the submit is flushed, even if deferred
213 * 2) Add cmdstream bos to bos table
214 * 3) Update bo fences
215 */
216 static bool
fd_submit_sp_flush_prep(struct fd_submit * submit,int in_fence_fd,struct fd_fence * out_fence)217 fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
218 struct fd_fence *out_fence)
219 {
220 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
221 bool has_shared = false;
222
223 finalize_current_cmd(submit->primary);
224
225 struct fd_ringbuffer_sp *primary =
226 to_fd_ringbuffer_sp(submit->primary);
227
228 for (unsigned i = 0; i < primary->u.nr_cmds; i++)
229 fd_submit_append_bo(fd_submit, primary->u.cmds[i].ring_bo);
230
231 out_fence->ufence = submit->fence;
232
233 simple_mtx_lock(&fence_lock);
234 for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
235 fd_bo_add_fence(fd_submit->bos[i], out_fence);
236 has_shared |= fd_submit->bos[i]->alloc_flags & FD_BO_SHARED;
237 }
238 for (unsigned i = 0; i < fd_submit->nr_suballoc_bos; i++) {
239 fd_bo_add_fence(fd_submit->suballoc_bos[i], out_fence);
240 }
241 simple_mtx_unlock(&fence_lock);
242
243 fd_submit->out_fence = fd_fence_ref(out_fence);
244 fd_submit->in_fence_fd = (in_fence_fd == -1) ?
245 -1 : os_dupfd_cloexec(in_fence_fd);
246
247 return has_shared;
248 }
249
250 static void
fd_submit_sp_flush_execute(void * job,void * gdata,int thread_index)251 fd_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
252 {
253 struct fd_submit *submit = job;
254 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
255 struct fd_pipe *pipe = submit->pipe;
256
257 fd_submit->flush_submit_list(&fd_submit->submit_list);
258
259 pthread_mutex_lock(&flush_mtx);
260 assert(fd_fence_before(pipe->last_submit_fence, fd_submit->base.fence));
261 pipe->last_submit_fence = fd_submit->base.fence;
262 pthread_cond_broadcast(&flush_cnd);
263 pthread_mutex_unlock(&flush_mtx);
264
265 DEBUG_MSG("finish: %u", submit->fence);
266 }
267
268 static void
fd_submit_sp_flush_cleanup(void * job,void * gdata,int thread_index)269 fd_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
270 {
271 struct fd_submit *submit = job;
272 fd_submit_del(submit);
273 }
274
275 static void
flush_deferred_submits(struct fd_device * dev)276 flush_deferred_submits(struct fd_device *dev)
277 {
278 MESA_TRACE_FUNC();
279
280 simple_mtx_assert_locked(&dev->submit_lock);
281
282 if (list_is_empty(&dev->deferred_submits))
283 return;
284
285 struct fd_submit *submit = last_submit(&dev->deferred_submits);
286 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
287 list_replace(&dev->deferred_submits, &fd_submit->submit_list);
288 list_inithead(&dev->deferred_submits);
289 dev->deferred_cmds = 0;
290
291 /* If we have multiple submits with in-fence-fd's then merge them: */
292 foreach_submit (submit, &fd_submit->submit_list) {
293 struct fd_submit_sp *fd_deferred_submit = to_fd_submit_sp(submit);
294
295 if (fd_deferred_submit == fd_submit)
296 break;
297
298 if (fd_deferred_submit->in_fence_fd != -1) {
299 sync_accumulate("freedreno",
300 &fd_submit->in_fence_fd,
301 fd_deferred_submit->in_fence_fd);
302 close(fd_deferred_submit->in_fence_fd);
303 fd_deferred_submit->in_fence_fd = -1;
304 }
305 }
306
307 fd_fence_del(dev->deferred_submits_fence);
308 dev->deferred_submits_fence = NULL;
309
310 struct util_queue_fence *fence = &fd_submit->out_fence->ready;
311
312 DEBUG_MSG("enqueue: %u", submit->fence);
313
314 if (fd_device_threaded_submit(submit->pipe->dev)) {
315 util_queue_add_job(&submit->pipe->dev->submit_queue,
316 submit, fence,
317 fd_submit_sp_flush_execute,
318 fd_submit_sp_flush_cleanup,
319 0);
320 } else {
321 fd_submit_sp_flush_execute(submit, NULL, 0);
322 fd_submit_sp_flush_cleanup(submit, NULL, 0);
323 }
324 }
325
326 static bool
should_defer(struct fd_submit * submit)327 should_defer(struct fd_submit *submit)
328 {
329 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
330
331 /* if too many bo's, it may not be worth the CPU cost of submit merging: */
332 if (fd_submit->nr_bos > 30)
333 return false;
334
335 /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
336 * cmds before we exceed the size of the ringbuffer, which results in
337 * deadlock writing into the RB (ie. kernel doesn't finish writing into
338 * the RB so it doesn't kick the GPU to start consuming from the RB)
339 */
340 if (submit->pipe->dev->deferred_cmds > 128)
341 return false;
342
343 return true;
344 }
345
346 static struct fd_fence *
fd_submit_sp_flush(struct fd_submit * submit,int in_fence_fd,bool use_fence_fd)347 fd_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, bool use_fence_fd)
348 {
349 struct fd_device *dev = submit->pipe->dev;
350 struct fd_pipe *pipe = submit->pipe;
351
352 MESA_TRACE_FUNC();
353
354 /* Acquire lock before flush_prep() because it is possible to race between
355 * this and pipe->flush():
356 */
357 simple_mtx_lock(&dev->submit_lock);
358
359 /* If there are deferred submits from another fd_pipe, flush them now,
360 * since we can't merge submits from different submitqueue's (ie. they
361 * could have different priority, etc)
362 */
363 if (!list_is_empty(&dev->deferred_submits) &&
364 (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
365 flush_deferred_submits(dev);
366 }
367
368 list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
369
370 if (!dev->deferred_submits_fence)
371 dev->deferred_submits_fence = fd_fence_new(submit->pipe, use_fence_fd);
372
373 struct fd_fence *out_fence = fd_fence_ref(dev->deferred_submits_fence);
374
375 /* upgrade the out_fence for the deferred submits, if needed: */
376 if (use_fence_fd)
377 out_fence->use_fence_fd = true;
378
379 bool has_shared = fd_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
380
381 if ((in_fence_fd != -1) || out_fence->use_fence_fd)
382 pipe->no_implicit_sync = true;
383
384 /* The rule about skipping submit merging with shared buffers is only
385 * needed for implicit-sync.
386 */
387 if (pipe->no_implicit_sync)
388 has_shared = false;
389
390 assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence));
391 pipe->last_enqueue_fence = submit->fence;
392
393 /* If we don't need an out-fence, we can defer the submit.
394 *
395 * TODO we could defer submits with in-fence as well.. if we took our own
396 * reference to the fd, and merged all the in-fence-fd's when we flush the
397 * deferred submits
398 */
399 if (!use_fence_fd && !has_shared && should_defer(submit)) {
400 DEBUG_MSG("defer: %u", submit->fence);
401 dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
402 assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
403 simple_mtx_unlock(&dev->submit_lock);
404
405 return out_fence;
406 }
407
408 flush_deferred_submits(dev);
409
410 simple_mtx_unlock(&dev->submit_lock);
411
412 return out_fence;
413 }
414
415 void
fd_pipe_sp_flush(struct fd_pipe * pipe,uint32_t fence)416 fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
417 {
418 struct fd_device *dev = pipe->dev;
419
420 if (!fd_fence_before(pipe->last_submit_fence, fence))
421 return;
422
423 MESA_TRACE_FUNC();
424
425 simple_mtx_lock(&dev->submit_lock);
426
427 assert(!fd_fence_after(fence, pipe->last_enqueue_fence));
428
429 flush_deferred_submits(dev);
430
431 simple_mtx_unlock(&dev->submit_lock);
432
433 if (!fd_device_threaded_submit(pipe->dev))
434 return;
435
436 /* Once we are sure that we've enqueued at least up to the requested
437 * submit, we need to be sure that submitq has caught up and flushed
438 * them to the kernel
439 */
440 pthread_mutex_lock(&flush_mtx);
441 while (fd_fence_before(pipe->last_submit_fence, fence)) {
442 pthread_cond_wait(&flush_cnd, &flush_mtx);
443 }
444 pthread_mutex_unlock(&flush_mtx);
445 }
446
447 static void
fd_submit_sp_destroy(struct fd_submit * submit)448 fd_submit_sp_destroy(struct fd_submit *submit)
449 {
450 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
451
452 if (fd_submit->suballoc_ring)
453 fd_ringbuffer_del(fd_submit->suballoc_ring);
454
455 _mesa_hash_table_destroy(fd_submit->bo_table, NULL);
456 _mesa_hash_table_destroy(fd_submit->suballoc_bo_table, NULL);
457
458 // TODO it would be nice to have a way to assert() if all
459 // rb's haven't been free'd back to the slab, because that is
460 // an indication that we are leaking bo's
461 slab_destroy_child(&fd_submit->ring_pool);
462
463 fd_bo_del_array(fd_submit->bos, fd_submit->nr_bos);
464 free(fd_submit->bos);
465
466 fd_bo_del_array(fd_submit->suballoc_bos, fd_submit->nr_suballoc_bos);
467 free(fd_submit->suballoc_bos);
468
469 if (fd_submit->out_fence)
470 fd_fence_del(fd_submit->out_fence);
471
472 free(fd_submit);
473 }
474
475 static const struct fd_submit_funcs submit_funcs = {
476 .new_ringbuffer = fd_submit_sp_new_ringbuffer,
477 .flush = fd_submit_sp_flush,
478 .destroy = fd_submit_sp_destroy,
479 };
480
481 struct fd_submit *
fd_submit_sp_new(struct fd_pipe * pipe,flush_submit_list_fn flush_submit_list)482 fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list)
483 {
484 struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit));
485 struct fd_submit *submit;
486
487 fd_submit->bo_table = _mesa_pointer_hash_table_create(NULL);
488 fd_submit->suballoc_bo_table = _mesa_pointer_hash_table_create(NULL);
489
490 slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool);
491
492 fd_submit->flush_submit_list = flush_submit_list;
493 fd_submit->seqno = seqno_next(&pipe->submit_seqno);
494
495 submit = &fd_submit->base;
496 submit->funcs = &submit_funcs;
497
498 return submit;
499 }
500
501 void
fd_pipe_sp_ringpool_init(struct fd_pipe * pipe)502 fd_pipe_sp_ringpool_init(struct fd_pipe *pipe)
503 {
504 // TODO tune size:
505 slab_create_parent(&pipe->ring_pool, sizeof(struct fd_ringbuffer_sp), 16);
506 }
507
508 void
fd_pipe_sp_ringpool_fini(struct fd_pipe * pipe)509 fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe)
510 {
511 if (pipe->ring_pool.num_elements)
512 slab_destroy_parent(&pipe->ring_pool);
513 }
514
515 static void
finalize_current_cmd(struct fd_ringbuffer * ring)516 finalize_current_cmd(struct fd_ringbuffer *ring)
517 {
518 assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
519
520 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
521 APPEND(&fd_ring->u, cmds,
522 (struct fd_cmd_sp){
523 .ring_bo = fd_bo_ref(fd_ring->ring_bo),
524 .size = offset_bytes(ring->cur, ring->start),
525 });
526 }
527
528 static void
fd_ringbuffer_sp_grow(struct fd_ringbuffer * ring,uint32_t size)529 fd_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
530 {
531 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
532 struct fd_pipe *pipe = fd_ring->u.submit->pipe;
533
534 assert(ring->flags & FD_RINGBUFFER_GROWABLE);
535
536 finalize_current_cmd(ring);
537
538 fd_bo_del(fd_ring->ring_bo);
539 fd_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
540
541 ring->start = fd_bo_map(fd_ring->ring_bo);
542 ring->end = &(ring->start[size / 4]);
543 ring->cur = ring->start;
544 ring->size = size;
545 }
546
547 static inline bool
fd_ringbuffer_references_bo(struct fd_ringbuffer * ring,struct fd_bo * bo)548 fd_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
549 {
550 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
551
552 for (int i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
553 if (fd_ring->u.reloc_bos[i] == bo)
554 return true;
555 }
556 return false;
557 }
558
559 static void
fd_ringbuffer_sp_emit_bo_nonobj(struct fd_ringbuffer * ring,struct fd_bo * bo)560 fd_ringbuffer_sp_emit_bo_nonobj(struct fd_ringbuffer *ring, struct fd_bo *bo)
561 {
562 assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
563
564 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
565 struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
566
567 fd_submit_append_bo(fd_submit, bo);
568 }
569
570 static void
fd_ringbuffer_sp_assert_attached_nonobj(struct fd_ringbuffer * ring,struct fd_bo * bo)571 fd_ringbuffer_sp_assert_attached_nonobj(struct fd_ringbuffer *ring, struct fd_bo *bo)
572 {
573 #ifndef NDEBUG
574 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
575 struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
576 assert(check_append_bo(fd_submit, bo, true) != ~0);
577 #endif
578 }
579
580 static void
fd_ringbuffer_sp_emit_bo_obj(struct fd_ringbuffer * ring,struct fd_bo * bo)581 fd_ringbuffer_sp_emit_bo_obj(struct fd_ringbuffer *ring, struct fd_bo *bo)
582 {
583 assert(ring->flags & _FD_RINGBUFFER_OBJECT);
584
585 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
586
587 /* Avoid emitting duplicate BO references into the list. Ringbuffer
588 * objects are long-lived, so this saves ongoing work at draw time in
589 * exchange for a bit at context setup/first draw. And the number of
590 * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
591 * hurt much.
592 */
593 if (!fd_ringbuffer_references_bo(ring, bo)) {
594 APPEND(&fd_ring->u, reloc_bos, fd_bo_ref(bo));
595 }
596 }
597
598 static void
fd_ringbuffer_sp_assert_attached_obj(struct fd_ringbuffer * ring,struct fd_bo * bo)599 fd_ringbuffer_sp_assert_attached_obj(struct fd_ringbuffer *ring, struct fd_bo *bo)
600 {
601 #ifndef NDEBUG
602 /* If the stateobj already references the bo, nothing more to do: */
603 if (fd_ringbuffer_references_bo(ring, bo))
604 return;
605
606 /* If not, we need to defer the assert.. because the batch resource
607 * tracking may have attached the bo to the submit that the stateobj
608 * will eventually be referenced by:
609 */
610 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
611 for (int i = 0; i < fd_ring->u.nr_assert_bos; i++)
612 if (fd_ring->u.assert_bos[i] == bo)
613 return;
614
615 APPEND(&fd_ring->u, assert_bos, fd_bo_ref(bo));
616 #endif
617 }
618
619 #define PTRSZ 64
620 #include "freedreno_ringbuffer_sp_reloc.h"
621 #undef PTRSZ
622 #define PTRSZ 32
623 #include "freedreno_ringbuffer_sp_reloc.h"
624 #undef PTRSZ
625
626 static uint32_t
fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer * ring)627 fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
628 {
629 if (ring->flags & FD_RINGBUFFER_GROWABLE)
630 return to_fd_ringbuffer_sp(ring)->u.nr_cmds + 1;
631 return 1;
632 }
633
634 static bool
fd_ringbuffer_sp_check_size(struct fd_ringbuffer * ring)635 fd_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
636 {
637 assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
638 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
639 struct fd_submit *submit = fd_ring->u.submit;
640
641 if (to_fd_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
642 return false;
643 }
644
645 if (to_fd_submit_sp(submit)->nr_suballoc_bos > MAX_ARRAY_SIZE/2) {
646 return false;
647 }
648
649 return true;
650 }
651
652 static void
fd_ringbuffer_sp_destroy(struct fd_ringbuffer * ring)653 fd_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
654 {
655 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
656
657 fd_bo_del(fd_ring->ring_bo);
658
659 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
660 fd_bo_del_array(fd_ring->u.reloc_bos, fd_ring->u.nr_reloc_bos);
661 free(fd_ring->u.reloc_bos);
662 #ifndef NDEBUG
663 fd_bo_del_array(fd_ring->u.assert_bos, fd_ring->u.nr_assert_bos);
664 free(fd_ring->u.assert_bos);
665 #endif
666 free(fd_ring);
667 } else {
668 struct fd_submit *submit = fd_ring->u.submit;
669
670 // TODO re-arrange the data structures so we can use fd_bo_del_array()
671 for (unsigned i = 0; i < fd_ring->u.nr_cmds; i++) {
672 fd_bo_del(fd_ring->u.cmds[i].ring_bo);
673 }
674 free(fd_ring->u.cmds);
675
676 slab_free(&to_fd_submit_sp(submit)->ring_pool, fd_ring);
677 }
678 }
679
680 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
681 .grow = fd_ringbuffer_sp_grow,
682 .emit_bo = fd_ringbuffer_sp_emit_bo_nonobj,
683 .assert_attached = fd_ringbuffer_sp_assert_attached_nonobj,
684 .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_32,
685 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
686 .cmd_count = fd_ringbuffer_sp_cmd_count,
687 .check_size = fd_ringbuffer_sp_check_size,
688 .destroy = fd_ringbuffer_sp_destroy,
689 };
690
691 static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
692 .grow = fd_ringbuffer_sp_grow,
693 .emit_bo = fd_ringbuffer_sp_emit_bo_obj,
694 .assert_attached = fd_ringbuffer_sp_assert_attached_obj,
695 .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_32,
696 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
697 .cmd_count = fd_ringbuffer_sp_cmd_count,
698 .destroy = fd_ringbuffer_sp_destroy,
699 };
700
701 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
702 .grow = fd_ringbuffer_sp_grow,
703 .emit_bo = fd_ringbuffer_sp_emit_bo_nonobj,
704 .assert_attached = fd_ringbuffer_sp_assert_attached_nonobj,
705 .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_64,
706 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
707 .cmd_count = fd_ringbuffer_sp_cmd_count,
708 .check_size = fd_ringbuffer_sp_check_size,
709 .destroy = fd_ringbuffer_sp_destroy,
710 };
711
712 static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
713 .grow = fd_ringbuffer_sp_grow,
714 .emit_bo = fd_ringbuffer_sp_emit_bo_obj,
715 .assert_attached = fd_ringbuffer_sp_assert_attached_obj,
716 .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_64,
717 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
718 .cmd_count = fd_ringbuffer_sp_cmd_count,
719 .destroy = fd_ringbuffer_sp_destroy,
720 };
721
722 static inline struct fd_ringbuffer *
fd_ringbuffer_sp_init(struct fd_ringbuffer_sp * fd_ring,uint32_t size,enum fd_ringbuffer_flags flags)723 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
724 enum fd_ringbuffer_flags flags)
725 {
726 struct fd_ringbuffer *ring = &fd_ring->base;
727
728 assert(fd_ring->ring_bo);
729
730 uint8_t *base = fd_bo_map(fd_ring->ring_bo);
731 ring->start = (void *)(base + fd_ring->offset);
732 ring->end = &(ring->start[size / 4]);
733 ring->cur = ring->start;
734
735 ring->size = size;
736 ring->flags = flags;
737
738 if (flags & _FD_RINGBUFFER_OBJECT) {
739 if (fd_ring->u.pipe->is_64bit) {
740 ring->funcs = &ring_funcs_obj_64;
741 } else {
742 ring->funcs = &ring_funcs_obj_32;
743 }
744 } else {
745 if (fd_ring->u.submit->pipe->is_64bit) {
746 ring->funcs = &ring_funcs_nonobj_64;
747 } else {
748 ring->funcs = &ring_funcs_nonobj_32;
749 }
750 }
751
752 // TODO initializing these could probably be conditional on flags
753 // since unneed for FD_RINGBUFFER_STAGING case..
754 fd_ring->u.cmds = NULL;
755 fd_ring->u.nr_cmds = fd_ring->u.max_cmds = 0;
756
757 fd_ring->u.reloc_bos = NULL;
758 fd_ring->u.nr_reloc_bos = fd_ring->u.max_reloc_bos = 0;
759 #ifndef NDEBUG
760 fd_ring->u.assert_bos = NULL;
761 fd_ring->u.nr_assert_bos = fd_ring->u.max_assert_bos = 0;
762 #endif
763
764 return ring;
765 }
766
767 struct fd_ringbuffer *
fd_ringbuffer_sp_new_object(struct fd_pipe * pipe,uint32_t size)768 fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
769 {
770 struct fd_device *dev = pipe->dev;
771 struct fd_ringbuffer_sp *fd_ring = malloc(sizeof(*fd_ring));
772
773 /* Lock access to the fd_pipe->suballoc_* since ringbuffer object allocation
774 * can happen both on the frontend (most CSOs) and the driver thread (a6xx
775 * cached tex state, for example)
776 */
777 simple_mtx_lock(&dev->suballoc_lock);
778
779 fd_ring->offset = align(dev->suballoc_offset, SUBALLOC_ALIGNMENT);
780 if (!dev->suballoc_bo ||
781 fd_ring->offset + size > fd_bo_size(dev->suballoc_bo)) {
782 if (dev->suballoc_bo)
783 fd_bo_del(dev->suballoc_bo);
784 dev->suballoc_bo =
785 fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
786 fd_ring->offset = 0;
787 }
788
789 fd_ring->u.pipe = pipe;
790 fd_ring->ring_bo = fd_bo_ref(dev->suballoc_bo);
791 fd_ring->base.refcnt = 1;
792 fd_ring->u.last_submit_seqno = 0;
793
794 dev->suballoc_offset = fd_ring->offset + size;
795
796 simple_mtx_unlock(&dev->suballoc_lock);
797
798 return fd_ringbuffer_sp_init(fd_ring, size, _FD_RINGBUFFER_OBJECT);
799 }
800