1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include <assert.h>
28 #include <inttypes.h>
29 #include <pthread.h>
30
31 #include "util/hash_table.h"
32 #include "util/os_file.h"
33 #include "util/slab.h"
34
35 #include "freedreno_ringbuffer_sp.h"
36
37 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
38 * by avoiding the additional tracking necessary to build cmds/relocs tables
39 * (but still builds a bos table)
40 */
41
42 #define INIT_SIZE 0x1000
43
44 #define SUBALLOC_SIZE (32 * 1024)
45
46 /* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
47 * instead use a condition-variable. Note that pipe->flush() is not expected
48 * to be a common/hot path.
49 */
50 static pthread_cond_t flush_cnd = PTHREAD_COND_INITIALIZER;
51 static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
52
53 static void finalize_current_cmd(struct fd_ringbuffer *ring);
54 static struct fd_ringbuffer *
55 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
56 enum fd_ringbuffer_flags flags);
57
58 /* add (if needed) bo to submit and return index: */
59 uint32_t
fd_submit_append_bo(struct fd_submit_sp * submit,struct fd_bo * bo)60 fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
61 {
62 uint32_t idx;
63
64 /* NOTE: it is legal to use the same bo on different threads for
65 * different submits. But it is not legal to use the same submit
66 * from different threads.
67 */
68 idx = READ_ONCE(bo->idx);
69
70 if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
71 uint32_t hash = _mesa_hash_pointer(bo);
72 struct hash_entry *entry;
73
74 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
75 if (entry) {
76 /* found */
77 idx = (uint32_t)(uintptr_t)entry->data;
78 } else {
79 idx = APPEND(submit, bos, fd_bo_ref(bo));
80
81 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
82 (void *)(uintptr_t)idx);
83 }
84 bo->idx = idx;
85 }
86
87 return idx;
88 }
89
90 static void
fd_submit_suballoc_ring_bo(struct fd_submit * submit,struct fd_ringbuffer_sp * fd_ring,uint32_t size)91 fd_submit_suballoc_ring_bo(struct fd_submit *submit,
92 struct fd_ringbuffer_sp *fd_ring, uint32_t size)
93 {
94 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
95 unsigned suballoc_offset = 0;
96 struct fd_bo *suballoc_bo = NULL;
97
98 if (fd_submit->suballoc_ring) {
99 struct fd_ringbuffer_sp *suballoc_ring =
100 to_fd_ringbuffer_sp(fd_submit->suballoc_ring);
101
102 suballoc_bo = suballoc_ring->ring_bo;
103 suballoc_offset =
104 fd_ringbuffer_size(fd_submit->suballoc_ring) + suballoc_ring->offset;
105
106 suballoc_offset = align(suballoc_offset, 0x10);
107
108 if ((size + suballoc_offset) > suballoc_bo->size) {
109 suballoc_bo = NULL;
110 }
111 }
112
113 if (!suballoc_bo) {
114 // TODO possibly larger size for streaming bo?
115 fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
116 fd_ring->offset = 0;
117 } else {
118 fd_ring->ring_bo = fd_bo_ref(suballoc_bo);
119 fd_ring->offset = suballoc_offset;
120 }
121
122 struct fd_ringbuffer *old_suballoc_ring = fd_submit->suballoc_ring;
123
124 fd_submit->suballoc_ring = fd_ringbuffer_ref(&fd_ring->base);
125
126 if (old_suballoc_ring)
127 fd_ringbuffer_del(old_suballoc_ring);
128 }
129
130 static struct fd_ringbuffer *
fd_submit_sp_new_ringbuffer(struct fd_submit * submit,uint32_t size,enum fd_ringbuffer_flags flags)131 fd_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
132 enum fd_ringbuffer_flags flags)
133 {
134 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
135 struct fd_ringbuffer_sp *fd_ring;
136
137 fd_ring = slab_alloc(&fd_submit->ring_pool);
138
139 fd_ring->u.submit = submit;
140
141 /* NOTE: needs to be before _suballoc_ring_bo() since it could
142 * increment the refcnt of the current ring
143 */
144 fd_ring->base.refcnt = 1;
145
146 if (flags & FD_RINGBUFFER_STREAMING) {
147 fd_submit_suballoc_ring_bo(submit, fd_ring, size);
148 } else {
149 if (flags & FD_RINGBUFFER_GROWABLE)
150 size = INIT_SIZE;
151
152 fd_ring->offset = 0;
153 fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
154 }
155
156 if (!fd_ringbuffer_sp_init(fd_ring, size, flags))
157 return NULL;
158
159 return &fd_ring->base;
160 }
161
162 /**
163 * Prepare submit for flush, always done synchronously.
164 *
165 * 1) Finalize primary ringbuffer, at this point no more cmdstream may
166 * be written into it, since from the PoV of the upper level driver
167 * the submit is flushed, even if deferred
168 * 2) Add cmdstream bos to bos table
169 * 3) Update bo fences
170 */
171 static bool
fd_submit_sp_flush_prep(struct fd_submit * submit,int in_fence_fd,struct fd_submit_fence * out_fence)172 fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
173 struct fd_submit_fence *out_fence)
174 {
175 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
176 bool has_shared = false;
177
178 finalize_current_cmd(submit->primary);
179
180 struct fd_ringbuffer_sp *primary =
181 to_fd_ringbuffer_sp(submit->primary);
182
183 for (unsigned i = 0; i < primary->u.nr_cmds; i++)
184 fd_submit_append_bo(fd_submit, primary->u.cmds[i].ring_bo);
185
186 simple_mtx_lock(&table_lock);
187 for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
188 fd_bo_add_fence(fd_submit->bos[i], submit->pipe, submit->fence);
189 has_shared |= fd_submit->bos[i]->shared;
190 }
191 simple_mtx_unlock(&table_lock);
192
193 fd_submit->out_fence = out_fence;
194 fd_submit->in_fence_fd = (in_fence_fd == -1) ?
195 -1 : os_dupfd_cloexec(in_fence_fd);
196
197 return has_shared;
198 }
199
200 static void
fd_submit_sp_flush_execute(void * job,void * gdata,int thread_index)201 fd_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
202 {
203 struct fd_submit *submit = job;
204 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
205 struct fd_pipe *pipe = submit->pipe;
206
207 fd_submit->flush_submit_list(&fd_submit->submit_list);
208
209 pthread_mutex_lock(&flush_mtx);
210 assert(fd_fence_before(pipe->last_submit_fence, fd_submit->base.fence));
211 pipe->last_submit_fence = fd_submit->base.fence;
212 pthread_cond_broadcast(&flush_cnd);
213 pthread_mutex_unlock(&flush_mtx);
214
215 DEBUG_MSG("finish: %u", submit->fence);
216 }
217
218 static void
fd_submit_sp_flush_cleanup(void * job,void * gdata,int thread_index)219 fd_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
220 {
221 struct fd_submit *submit = job;
222 fd_submit_del(submit);
223 }
224
225 static int
enqueue_submit_list(struct list_head * submit_list)226 enqueue_submit_list(struct list_head *submit_list)
227 {
228 struct fd_submit *submit = last_submit(submit_list);
229 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
230
231 list_replace(submit_list, &fd_submit->submit_list);
232 list_inithead(submit_list);
233
234 struct util_queue_fence *fence;
235 if (fd_submit->out_fence) {
236 fence = &fd_submit->out_fence->ready;
237 } else {
238 util_queue_fence_init(&fd_submit->fence);
239 fence = &fd_submit->fence;
240 }
241
242 DEBUG_MSG("enqueue: %u", submit->fence);
243
244 util_queue_add_job(&submit->pipe->dev->submit_queue,
245 submit, fence,
246 fd_submit_sp_flush_execute,
247 fd_submit_sp_flush_cleanup,
248 0);
249
250 return 0;
251 }
252
253 static bool
should_defer(struct fd_submit * submit)254 should_defer(struct fd_submit *submit)
255 {
256 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
257
258 /* if too many bo's, it may not be worth the CPU cost of submit merging: */
259 if (fd_submit->nr_bos > 30)
260 return false;
261
262 /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
263 * cmds before we exceed the size of the ringbuffer, which results in
264 * deadlock writing into the RB (ie. kernel doesn't finish writing into
265 * the RB so it doesn't kick the GPU to start consuming from the RB)
266 */
267 if (submit->pipe->dev->deferred_cmds > 128)
268 return false;
269
270 return true;
271 }
272
273 static int
fd_submit_sp_flush(struct fd_submit * submit,int in_fence_fd,struct fd_submit_fence * out_fence)274 fd_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
275 struct fd_submit_fence *out_fence)
276 {
277 struct fd_device *dev = submit->pipe->dev;
278 struct fd_pipe *pipe = submit->pipe;
279
280 /* Acquire lock before flush_prep() because it is possible to race between
281 * this and pipe->flush():
282 */
283 simple_mtx_lock(&dev->submit_lock);
284
285 /* If there are deferred submits from another fd_pipe, flush them now,
286 * since we can't merge submits from different submitqueue's (ie. they
287 * could have different priority, etc)
288 */
289 if (!list_is_empty(&dev->deferred_submits) &&
290 (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
291 struct list_head submit_list;
292
293 list_replace(&dev->deferred_submits, &submit_list);
294 list_inithead(&dev->deferred_submits);
295 dev->deferred_cmds = 0;
296
297 enqueue_submit_list(&submit_list);
298 }
299
300 list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
301
302 bool has_shared = fd_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
303
304 assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence));
305 pipe->last_enqueue_fence = submit->fence;
306
307 /* If we don't need an out-fence, we can defer the submit.
308 *
309 * TODO we could defer submits with in-fence as well.. if we took our own
310 * reference to the fd, and merged all the in-fence-fd's when we flush the
311 * deferred submits
312 */
313 if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) {
314 DEBUG_MSG("defer: %u", submit->fence);
315 dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
316 assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
317 simple_mtx_unlock(&dev->submit_lock);
318
319 return 0;
320 }
321
322 struct list_head submit_list;
323
324 list_replace(&dev->deferred_submits, &submit_list);
325 list_inithead(&dev->deferred_submits);
326 dev->deferred_cmds = 0;
327
328 simple_mtx_unlock(&dev->submit_lock);
329
330 return enqueue_submit_list(&submit_list);
331 }
332
333 void
fd_pipe_sp_flush(struct fd_pipe * pipe,uint32_t fence)334 fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
335 {
336 struct fd_device *dev = pipe->dev;
337 struct list_head submit_list;
338
339 DEBUG_MSG("flush: %u", fence);
340
341 list_inithead(&submit_list);
342
343 simple_mtx_lock(&dev->submit_lock);
344
345 assert(!fd_fence_after(fence, pipe->last_enqueue_fence));
346
347 foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
348 /* We should never have submits from multiple pipes in the deferred
349 * list. If we did, we couldn't compare their fence to our fence,
350 * since each fd_pipe is an independent timeline.
351 */
352 if (deferred_submit->pipe != pipe)
353 break;
354
355 if (fd_fence_after(deferred_submit->fence, fence))
356 break;
357
358 list_del(&deferred_submit->node);
359 list_addtail(&deferred_submit->node, &submit_list);
360 dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
361 }
362
363 assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
364
365 simple_mtx_unlock(&dev->submit_lock);
366
367 if (list_is_empty(&submit_list))
368 goto flush_sync;
369
370 enqueue_submit_list(&submit_list);
371
372 flush_sync:
373 /* Once we are sure that we've enqueued at least up to the requested
374 * submit, we need to be sure that submitq has caught up and flushed
375 * them to the kernel
376 */
377 pthread_mutex_lock(&flush_mtx);
378 while (fd_fence_before(pipe->last_submit_fence, fence)) {
379 pthread_cond_wait(&flush_cnd, &flush_mtx);
380 }
381 pthread_mutex_unlock(&flush_mtx);
382 }
383
384 static void
fd_submit_sp_destroy(struct fd_submit * submit)385 fd_submit_sp_destroy(struct fd_submit *submit)
386 {
387 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
388
389 if (fd_submit->suballoc_ring)
390 fd_ringbuffer_del(fd_submit->suballoc_ring);
391
392 _mesa_hash_table_destroy(fd_submit->bo_table, NULL);
393
394 // TODO it would be nice to have a way to assert() if all
395 // rb's haven't been free'd back to the slab, because that is
396 // an indication that we are leaking bo's
397 slab_destroy_child(&fd_submit->ring_pool);
398
399 for (unsigned i = 0; i < fd_submit->nr_bos; i++)
400 fd_bo_del(fd_submit->bos[i]);
401
402 free(fd_submit->bos);
403 free(fd_submit);
404 }
405
406 static const struct fd_submit_funcs submit_funcs = {
407 .new_ringbuffer = fd_submit_sp_new_ringbuffer,
408 .flush = fd_submit_sp_flush,
409 .destroy = fd_submit_sp_destroy,
410 };
411
412 struct fd_submit *
fd_submit_sp_new(struct fd_pipe * pipe,flush_submit_list_fn flush_submit_list)413 fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list)
414 {
415 struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit));
416 struct fd_submit *submit;
417
418 fd_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
419 _mesa_key_pointer_equal);
420
421 slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool);
422
423 fd_submit->flush_submit_list = flush_submit_list;
424
425 submit = &fd_submit->base;
426 submit->funcs = &submit_funcs;
427
428 return submit;
429 }
430
431 void
fd_pipe_sp_ringpool_init(struct fd_pipe * pipe)432 fd_pipe_sp_ringpool_init(struct fd_pipe *pipe)
433 {
434 // TODO tune size:
435 slab_create_parent(&pipe->ring_pool, sizeof(struct fd_ringbuffer_sp), 16);
436 }
437
438 void
fd_pipe_sp_ringpool_fini(struct fd_pipe * pipe)439 fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe)
440 {
441 if (pipe->ring_pool.num_elements)
442 slab_destroy_parent(&pipe->ring_pool);
443 }
444
445 static void
finalize_current_cmd(struct fd_ringbuffer * ring)446 finalize_current_cmd(struct fd_ringbuffer *ring)
447 {
448 assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
449
450 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
451 APPEND(&fd_ring->u, cmds,
452 (struct fd_cmd_sp){
453 .ring_bo = fd_bo_ref(fd_ring->ring_bo),
454 .size = offset_bytes(ring->cur, ring->start),
455 });
456 }
457
458 static void
fd_ringbuffer_sp_grow(struct fd_ringbuffer * ring,uint32_t size)459 fd_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
460 {
461 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
462 struct fd_pipe *pipe = fd_ring->u.submit->pipe;
463
464 assert(ring->flags & FD_RINGBUFFER_GROWABLE);
465
466 finalize_current_cmd(ring);
467
468 fd_bo_del(fd_ring->ring_bo);
469 fd_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
470
471 ring->start = fd_bo_map(fd_ring->ring_bo);
472 ring->end = &(ring->start[size / 4]);
473 ring->cur = ring->start;
474 ring->size = size;
475 }
476
477 static inline bool
fd_ringbuffer_references_bo(struct fd_ringbuffer * ring,struct fd_bo * bo)478 fd_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
479 {
480 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
481
482 for (int i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
483 if (fd_ring->u.reloc_bos[i] == bo)
484 return true;
485 }
486 return false;
487 }
488
489 #define PTRSZ 64
490 #include "freedreno_ringbuffer_sp_reloc.h"
491 #undef PTRSZ
492 #define PTRSZ 32
493 #include "freedreno_ringbuffer_sp_reloc.h"
494 #undef PTRSZ
495
496 static uint32_t
fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer * ring)497 fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
498 {
499 if (ring->flags & FD_RINGBUFFER_GROWABLE)
500 return to_fd_ringbuffer_sp(ring)->u.nr_cmds + 1;
501 return 1;
502 }
503
504 static bool
fd_ringbuffer_sp_check_size(struct fd_ringbuffer * ring)505 fd_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
506 {
507 assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
508 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
509 struct fd_submit *submit = fd_ring->u.submit;
510
511 if (to_fd_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
512 return false;
513 }
514
515 return true;
516 }
517
518 static void
fd_ringbuffer_sp_destroy(struct fd_ringbuffer * ring)519 fd_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
520 {
521 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
522
523 fd_bo_del(fd_ring->ring_bo);
524
525 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
526 for (unsigned i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
527 fd_bo_del(fd_ring->u.reloc_bos[i]);
528 }
529 free(fd_ring->u.reloc_bos);
530
531 free(fd_ring);
532 } else {
533 struct fd_submit *submit = fd_ring->u.submit;
534
535 for (unsigned i = 0; i < fd_ring->u.nr_cmds; i++) {
536 fd_bo_del(fd_ring->u.cmds[i].ring_bo);
537 }
538 free(fd_ring->u.cmds);
539
540 slab_free(&to_fd_submit_sp(submit)->ring_pool, fd_ring);
541 }
542 }
543
544 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
545 .grow = fd_ringbuffer_sp_grow,
546 .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_32,
547 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
548 .cmd_count = fd_ringbuffer_sp_cmd_count,
549 .check_size = fd_ringbuffer_sp_check_size,
550 .destroy = fd_ringbuffer_sp_destroy,
551 };
552
553 static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
554 .grow = fd_ringbuffer_sp_grow,
555 .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_32,
556 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
557 .cmd_count = fd_ringbuffer_sp_cmd_count,
558 .destroy = fd_ringbuffer_sp_destroy,
559 };
560
561 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
562 .grow = fd_ringbuffer_sp_grow,
563 .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_64,
564 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
565 .cmd_count = fd_ringbuffer_sp_cmd_count,
566 .check_size = fd_ringbuffer_sp_check_size,
567 .destroy = fd_ringbuffer_sp_destroy,
568 };
569
570 static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
571 .grow = fd_ringbuffer_sp_grow,
572 .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_64,
573 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
574 .cmd_count = fd_ringbuffer_sp_cmd_count,
575 .destroy = fd_ringbuffer_sp_destroy,
576 };
577
578 static inline struct fd_ringbuffer *
fd_ringbuffer_sp_init(struct fd_ringbuffer_sp * fd_ring,uint32_t size,enum fd_ringbuffer_flags flags)579 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
580 enum fd_ringbuffer_flags flags)
581 {
582 struct fd_ringbuffer *ring = &fd_ring->base;
583
584 assert(fd_ring->ring_bo);
585
586 uint8_t *base = fd_bo_map(fd_ring->ring_bo);
587 ring->start = (void *)(base + fd_ring->offset);
588 ring->end = &(ring->start[size / 4]);
589 ring->cur = ring->start;
590
591 ring->size = size;
592 ring->flags = flags;
593
594 if (flags & _FD_RINGBUFFER_OBJECT) {
595 if (fd_dev_64b(&fd_ring->u.pipe->dev_id)) {
596 ring->funcs = &ring_funcs_obj_64;
597 } else {
598 ring->funcs = &ring_funcs_obj_32;
599 }
600 } else {
601 if (fd_dev_64b(&fd_ring->u.submit->pipe->dev_id)) {
602 ring->funcs = &ring_funcs_nonobj_64;
603 } else {
604 ring->funcs = &ring_funcs_nonobj_32;
605 }
606 }
607
608 // TODO initializing these could probably be conditional on flags
609 // since unneed for FD_RINGBUFFER_STAGING case..
610 fd_ring->u.cmds = NULL;
611 fd_ring->u.nr_cmds = fd_ring->u.max_cmds = 0;
612
613 fd_ring->u.reloc_bos = NULL;
614 fd_ring->u.nr_reloc_bos = fd_ring->u.max_reloc_bos = 0;
615
616 return ring;
617 }
618
619 struct fd_ringbuffer *
fd_ringbuffer_sp_new_object(struct fd_pipe * pipe,uint32_t size)620 fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
621 {
622 struct fd_device *dev = pipe->dev;
623 struct fd_ringbuffer_sp *fd_ring = malloc(sizeof(*fd_ring));
624
625 /* Lock access to the fd_pipe->suballoc_* since ringbuffer object allocation
626 * can happen both on the frontend (most CSOs) and the driver thread (a6xx
627 * cached tex state, for example)
628 */
629 simple_mtx_lock(&dev->suballoc_lock);
630
631 /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
632 fd_ring->offset = align(dev->suballoc_offset, 64);
633 if (!dev->suballoc_bo ||
634 fd_ring->offset + size > fd_bo_size(dev->suballoc_bo)) {
635 if (dev->suballoc_bo)
636 fd_bo_del(dev->suballoc_bo);
637 dev->suballoc_bo =
638 fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
639 fd_ring->offset = 0;
640 }
641
642 fd_ring->u.pipe = pipe;
643 fd_ring->ring_bo = fd_bo_ref(dev->suballoc_bo);
644 fd_ring->base.refcnt = 1;
645
646 dev->suballoc_offset = fd_ring->offset + size;
647
648 simple_mtx_unlock(&dev->suballoc_lock);
649
650 return fd_ringbuffer_sp_init(fd_ring, size, _FD_RINGBUFFER_OBJECT);
651 }
652