1 /*
2 * Copyright © 2019 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "tu_cs.h"
7
8 #include "tu_suballoc.h"
9
10 /**
11 * Initialize a command stream.
12 */
13 void
tu_cs_init(struct tu_cs * cs,struct tu_device * device,enum tu_cs_mode mode,uint32_t initial_size,const char * name)14 tu_cs_init(struct tu_cs *cs,
15 struct tu_device *device,
16 enum tu_cs_mode mode,
17 uint32_t initial_size, const char *name)
18 {
19 assert(mode != TU_CS_MODE_EXTERNAL);
20
21 memset(cs, 0, sizeof(*cs));
22
23 cs->device = device;
24 cs->mode = mode;
25 cs->next_bo_size = initial_size;
26 cs->name = name;
27 }
28
29 /**
30 * Initialize a command stream as a wrapper to an external buffer.
31 */
32 void
tu_cs_init_external(struct tu_cs * cs,struct tu_device * device,uint32_t * start,uint32_t * end,uint64_t iova,bool writeable)33 tu_cs_init_external(struct tu_cs *cs, struct tu_device *device,
34 uint32_t *start, uint32_t *end, uint64_t iova,
35 bool writeable)
36 {
37 memset(cs, 0, sizeof(*cs));
38
39 cs->device = device;
40 cs->mode = TU_CS_MODE_EXTERNAL;
41 cs->start = cs->reserved_end = cs->cur = start;
42 cs->end = end;
43 cs->external_iova = iova;
44 cs->writeable = writeable;
45 }
46
47 /**
48 * Initialize a sub-command stream as a wrapper to an externally sub-allocated
49 * buffer.
50 */
51 void
tu_cs_init_suballoc(struct tu_cs * cs,struct tu_device * device,struct tu_suballoc_bo * suballoc_bo)52 tu_cs_init_suballoc(struct tu_cs *cs, struct tu_device *device,
53 struct tu_suballoc_bo *suballoc_bo)
54 {
55 uint32_t *start = (uint32_t *) tu_suballoc_bo_map(suballoc_bo);
56 uint32_t *end = start + (suballoc_bo->size >> 2);
57
58 memset(cs, 0, sizeof(*cs));
59 cs->device = device;
60 cs->mode = TU_CS_MODE_SUB_STREAM;
61 cs->start = cs->reserved_end = cs->cur = start;
62 cs->end = end;
63 cs->refcount_bo = tu_bo_get_ref(suballoc_bo->bo);
64 }
65
66 /**
67 * Finish and release all resources owned by a command stream.
68 */
69 void
tu_cs_finish(struct tu_cs * cs)70 tu_cs_finish(struct tu_cs *cs)
71 {
72 for (uint32_t i = 0; i < cs->read_only.bo_count; ++i) {
73 tu_bo_finish(cs->device, cs->read_only.bos[i]);
74 }
75
76 for (uint32_t i = 0; i < cs->read_write.bo_count; ++i) {
77 tu_bo_finish(cs->device, cs->read_write.bos[i]);
78 }
79
80 if (cs->refcount_bo)
81 tu_bo_finish(cs->device, cs->refcount_bo);
82
83 free(cs->entries);
84 free(cs->read_only.bos);
85 free(cs->read_write.bos);
86 }
87
88 static struct tu_bo *
tu_cs_current_bo(const struct tu_cs * cs)89 tu_cs_current_bo(const struct tu_cs *cs)
90 {
91 if (cs->refcount_bo) {
92 return cs->refcount_bo;
93 } else {
94 const struct tu_bo_array *bos = cs->writeable ? &cs->read_write : &cs->read_only;
95 assert(bos->bo_count);
96 return bos->bos[bos->bo_count - 1];
97 }
98 }
99
100 /**
101 * Get the offset of the command packets emitted since the last call to
102 * tu_cs_add_entry.
103 */
104 static uint32_t
tu_cs_get_offset(const struct tu_cs * cs)105 tu_cs_get_offset(const struct tu_cs *cs)
106 {
107 return cs->start - (uint32_t *) tu_cs_current_bo(cs)->map;
108 }
109
110 /* Get the iova for the next dword to be emitted. Useful after
111 * tu_cs_reserve_space() to create a patch point that can be overwritten on
112 * the GPU.
113 */
114 uint64_t
tu_cs_get_cur_iova(const struct tu_cs * cs)115 tu_cs_get_cur_iova(const struct tu_cs *cs)
116 {
117 if (cs->mode == TU_CS_MODE_EXTERNAL)
118 return cs->external_iova + ((char *) cs->cur - (char *) cs->start);
119 return tu_cs_current_bo(cs)->iova + ((char *) cs->cur - (char *) tu_cs_current_bo(cs)->map);
120 }
121
122 /*
123 * Allocate and add a BO to a command stream. Following command packets will
124 * be emitted to the new BO.
125 */
126 static VkResult
tu_cs_add_bo(struct tu_cs * cs,uint32_t size)127 tu_cs_add_bo(struct tu_cs *cs, uint32_t size)
128 {
129 /* no BO for TU_CS_MODE_EXTERNAL */
130 assert(cs->mode != TU_CS_MODE_EXTERNAL);
131 /* No adding more BOs if suballocating from a suballoc_bo. */
132 assert(!cs->refcount_bo);
133
134 /* no dangling command packet */
135 assert(tu_cs_is_empty(cs));
136
137 struct tu_bo_array *bos = cs->writeable ? &cs->read_write : &cs->read_only;
138
139 /* grow cs->bos if needed */
140 if (bos->bo_count == bos->bo_capacity) {
141 uint32_t new_capacity = MAX2(4, 2 * bos->bo_capacity);
142 struct tu_bo **new_bos = (struct tu_bo **)
143 realloc(bos->bos, new_capacity * sizeof(struct tu_bo *));
144 if (!new_bos)
145 return VK_ERROR_OUT_OF_HOST_MEMORY;
146
147 bos->bo_capacity = new_capacity;
148 bos->bos = new_bos;
149 }
150
151 struct tu_bo *new_bo;
152
153 VkResult result =
154 tu_bo_init_new(cs->device, &new_bo, size * sizeof(uint32_t),
155 (enum tu_bo_alloc_flags)(COND(!cs->writeable,
156 TU_BO_ALLOC_GPU_READ_ONLY) |
157 TU_BO_ALLOC_ALLOW_DUMP),
158 cs->name);
159 if (result != VK_SUCCESS) {
160 return result;
161 }
162
163 result = tu_bo_map(cs->device, new_bo);
164 if (result != VK_SUCCESS) {
165 tu_bo_finish(cs->device, new_bo);
166 return result;
167 }
168
169 bos->bos[bos->bo_count++] = new_bo;
170
171 cs->start = cs->cur = cs->reserved_end = (uint32_t *) new_bo->map;
172 cs->end = cs->start + new_bo->size / sizeof(uint32_t);
173
174 return VK_SUCCESS;
175 }
176
177 /**
178 * Reserve an IB entry.
179 */
180 static VkResult
tu_cs_reserve_entry(struct tu_cs * cs)181 tu_cs_reserve_entry(struct tu_cs *cs)
182 {
183 /* entries are only for TU_CS_MODE_GROW */
184 assert(cs->mode == TU_CS_MODE_GROW);
185
186 /* grow cs->entries if needed */
187 if (cs->entry_count == cs->entry_capacity) {
188 uint32_t new_capacity = MAX2(4, cs->entry_capacity * 2);
189 struct tu_cs_entry *new_entries = (struct tu_cs_entry *)
190 realloc(cs->entries, new_capacity * sizeof(struct tu_cs_entry));
191 if (!new_entries)
192 return VK_ERROR_OUT_OF_HOST_MEMORY;
193
194 cs->entry_capacity = new_capacity;
195 cs->entries = new_entries;
196 }
197
198 return VK_SUCCESS;
199 }
200
201
202 /**
203 * Add an IB entry for the command packets emitted since the last call to this
204 * function.
205 */
206 static void
tu_cs_add_entry(struct tu_cs * cs)207 tu_cs_add_entry(struct tu_cs *cs)
208 {
209 /* entries are only for TU_CS_MODE_GROW */
210 assert(cs->mode == TU_CS_MODE_GROW);
211
212 /* disallow empty entry */
213 assert(!tu_cs_is_empty(cs));
214
215 /*
216 * because we disallow empty entry, tu_cs_add_bo and tu_cs_reserve_entry
217 * must both have been called
218 */
219 assert(cs->writeable ? cs->read_write.bo_count : cs->read_only.bo_count);
220 assert(cs->entry_count < cs->entry_capacity);
221
222 /* add an entry for [cs->start, cs->cur] */
223 cs->entries[cs->entry_count++] = (struct tu_cs_entry) {
224 .bo = tu_cs_current_bo(cs),
225 .size = tu_cs_get_size(cs) * sizeof(uint32_t),
226 .offset = tu_cs_get_offset(cs) * sizeof(uint32_t),
227 };
228
229 cs->start = cs->cur;
230 }
231
232 /**
233 * same behavior as tu_cs_emit_call but without the indirect
234 */
235 VkResult
tu_cs_add_entries(struct tu_cs * cs,struct tu_cs * target)236 tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target)
237 {
238 VkResult result;
239
240 assert(cs->mode == TU_CS_MODE_GROW);
241 assert(target->mode == TU_CS_MODE_GROW);
242
243 if (!tu_cs_is_empty(cs))
244 tu_cs_add_entry(cs);
245
246 for (unsigned i = 0; i < target->entry_count; i++) {
247 result = tu_cs_reserve_entry(cs);
248 if (result != VK_SUCCESS)
249 return result;
250 cs->entries[cs->entry_count++] = target->entries[i];
251 }
252
253 return VK_SUCCESS;
254 }
255
256 /**
257 * Begin (or continue) command packet emission. This does nothing but sanity
258 * checks currently. \a cs must not be in TU_CS_MODE_SUB_STREAM mode.
259 */
260 void
tu_cs_begin(struct tu_cs * cs)261 tu_cs_begin(struct tu_cs *cs)
262 {
263 assert(cs->mode != TU_CS_MODE_SUB_STREAM);
264 assert(tu_cs_is_empty(cs));
265 }
266
267 /**
268 * End command packet emission. This adds an IB entry when \a cs is in
269 * TU_CS_MODE_GROW mode.
270 */
271 void
tu_cs_end(struct tu_cs * cs)272 tu_cs_end(struct tu_cs *cs)
273 {
274 assert(cs->mode != TU_CS_MODE_SUB_STREAM);
275
276 if (cs->mode == TU_CS_MODE_GROW && !tu_cs_is_empty(cs))
277 tu_cs_add_entry(cs);
278 }
279
280 void
tu_cs_set_writeable(struct tu_cs * cs,bool writeable)281 tu_cs_set_writeable(struct tu_cs *cs, bool writeable)
282 {
283 assert(cs->mode == TU_CS_MODE_GROW || cs->mode == TU_CS_MODE_SUB_STREAM);
284
285 if (cs->writeable != writeable) {
286 if (cs->mode == TU_CS_MODE_GROW && !tu_cs_is_empty(cs))
287 tu_cs_add_entry(cs);
288 struct tu_bo_array *old_bos = cs->writeable ? &cs->read_write : &cs->read_only;
289 struct tu_bo_array *new_bos = writeable ? &cs->read_write : &cs->read_only;
290
291 old_bos->start = cs->start;
292 cs->start = cs->cur = cs->reserved_end = new_bos->start;
293 if (new_bos->bo_count) {
294 struct tu_bo *bo = new_bos->bos[new_bos->bo_count - 1];
295 cs->end = (uint32_t *)bo->map + bo->size / sizeof(uint32_t);
296 } else {
297 cs->end = NULL;
298 }
299
300 cs->writeable = writeable;
301 }
302 }
303
304 /**
305 * Begin command packet emission to a sub-stream. \a cs must be in
306 * TU_CS_MODE_SUB_STREAM mode.
307 *
308 * Return \a sub_cs which is in TU_CS_MODE_EXTERNAL mode. tu_cs_begin and
309 * tu_cs_reserve_space are implied and \a sub_cs is ready for command packet
310 * emission.
311 */
312 VkResult
tu_cs_begin_sub_stream(struct tu_cs * cs,uint32_t size,struct tu_cs * sub_cs)313 tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs)
314 {
315 assert(cs->mode == TU_CS_MODE_SUB_STREAM);
316 assert(size);
317
318 VkResult result = tu_cs_reserve_space(cs, size);
319 if (result != VK_SUCCESS)
320 return result;
321
322 tu_cs_init_external(sub_cs, cs->device, cs->cur, cs->reserved_end,
323 tu_cs_get_cur_iova(cs), cs->writeable);
324 tu_cs_begin(sub_cs);
325 result = tu_cs_reserve_space(sub_cs, size);
326 assert(result == VK_SUCCESS);
327
328 return VK_SUCCESS;
329 }
330
331 /**
332 * Allocate count*size dwords, aligned to size dwords.
333 * \a cs must be in TU_CS_MODE_SUB_STREAM mode.
334 *
335 */
336 VkResult
tu_cs_alloc(struct tu_cs * cs,uint32_t count,uint32_t size,struct tu_cs_memory * memory)337 tu_cs_alloc(struct tu_cs *cs,
338 uint32_t count,
339 uint32_t size,
340 struct tu_cs_memory *memory)
341 {
342 assert(cs->mode == TU_CS_MODE_SUB_STREAM);
343 assert(size && size <= 1024);
344
345 if (!count) {
346 /* If you allocated no memory, you'd better not use the iova for anything
347 * (but it's left aligned for sanity).
348 */
349 memory->map = NULL;
350 memory->iova = 0xdead0000;
351 return VK_SUCCESS;
352 }
353
354 /* TODO: smarter way to deal with alignment? */
355
356 VkResult result = tu_cs_reserve_space(cs, count * size + (size-1));
357 if (result != VK_SUCCESS)
358 return result;
359
360 struct tu_bo *bo = tu_cs_current_bo(cs);
361 size_t offset = align(tu_cs_get_offset(cs), size);
362
363 memory->map = (uint32_t *) bo->map + offset;
364 memory->iova = bo->iova + offset * sizeof(uint32_t);
365 memory->writeable = cs->writeable;
366
367 cs->start = cs->cur = (uint32_t*) bo->map + offset + count * size;
368
369 return VK_SUCCESS;
370 }
371
372 /**
373 * End command packet emission to a sub-stream. \a sub_cs becomes invalid
374 * after this call.
375 *
376 * Return an IB entry for the sub-stream. The entry has the same lifetime as
377 * \a cs.
378 */
379 struct tu_cs_entry
tu_cs_end_sub_stream(struct tu_cs * cs,struct tu_cs * sub_cs)380 tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs)
381 {
382 assert(cs->mode == TU_CS_MODE_SUB_STREAM);
383 assert(sub_cs->start == cs->cur && sub_cs->end == cs->reserved_end);
384 tu_cs_sanity_check(sub_cs);
385
386 tu_cs_end(sub_cs);
387
388 cs->cur = sub_cs->cur;
389
390 struct tu_cs_entry entry = {
391 .bo = tu_cs_current_bo(cs),
392 .size = tu_cs_get_size(cs) * sizeof(uint32_t),
393 .offset = tu_cs_get_offset(cs) * sizeof(uint32_t),
394 };
395
396 cs->start = cs->cur;
397
398 return entry;
399 }
400
401 /**
402 * Reserve space from a command stream for \a reserved_size uint32_t values.
403 * This never fails when \a cs has mode TU_CS_MODE_EXTERNAL.
404 */
405 VkResult
tu_cs_reserve_space(struct tu_cs * cs,uint32_t reserved_size)406 tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size)
407 {
408 if (tu_cs_get_space(cs) < reserved_size) {
409 if (cs->mode == TU_CS_MODE_EXTERNAL) {
410 unreachable("cannot grow external buffer");
411 return VK_ERROR_OUT_OF_HOST_MEMORY;
412 }
413
414 /* add an entry for the exiting command packets */
415 if (!tu_cs_is_empty(cs)) {
416 /* no direct command packet for TU_CS_MODE_SUB_STREAM */
417 assert(cs->mode != TU_CS_MODE_SUB_STREAM);
418
419 tu_cs_add_entry(cs);
420 }
421
422 for (uint32_t i = 0; i < cs->cond_stack_depth; i++) {
423 /* Subtract one here to account for the DWORD field itself. */
424 *cs->cond_dwords[i] = cs->cur - cs->cond_dwords[i] - 1;
425
426 /* space for CP_COND_REG_EXEC in next bo */
427 reserved_size += 3;
428 }
429
430 /* switch to a new BO */
431 uint32_t new_size = MAX2(cs->next_bo_size, reserved_size);
432 VkResult result = tu_cs_add_bo(cs, new_size);
433 if (result != VK_SUCCESS)
434 return result;
435
436 if (cs->cond_stack_depth) {
437 cs->reserved_end = cs->cur + reserved_size;
438 }
439
440 /* Re-emit CP_COND_REG_EXECs */
441 for (uint32_t i = 0; i < cs->cond_stack_depth; i++) {
442 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
443 tu_cs_emit(cs, cs->cond_flags[i]);
444
445 cs->cond_dwords[i] = cs->cur;
446
447 /* Emit dummy DWORD field here */
448 tu_cs_emit(cs, RENDER_MODE_CP_COND_REG_EXEC_1_DWORDS(0));
449 }
450
451 /* double the size for the next bo, also there is an upper
452 * bound on IB size, which appears to be 0x0fffff
453 */
454 new_size = MIN2(new_size << 1, 0x0fffff);
455 if (cs->next_bo_size < new_size)
456 cs->next_bo_size = new_size;
457 }
458
459 assert(tu_cs_get_space(cs) >= reserved_size);
460 cs->reserved_end = cs->cur + reserved_size;
461
462 if (cs->mode == TU_CS_MODE_GROW) {
463 /* reserve an entry for the next call to this function or tu_cs_end */
464 return tu_cs_reserve_entry(cs);
465 }
466
467 return VK_SUCCESS;
468 }
469
470 /**
471 * Reset a command stream to its initial state. This discards all comand
472 * packets in \a cs, but does not necessarily release all resources.
473 */
474 void
tu_cs_reset(struct tu_cs * cs)475 tu_cs_reset(struct tu_cs *cs)
476 {
477 if (cs->mode == TU_CS_MODE_EXTERNAL) {
478 assert(!cs->read_only.bo_count && !cs->read_write.bo_count &&
479 !cs->refcount_bo && !cs->entry_count);
480 cs->reserved_end = cs->cur = cs->start;
481 return;
482 }
483
484 for (uint32_t i = 0; i + 1 < cs->read_only.bo_count; ++i) {
485 tu_bo_finish(cs->device, cs->read_only.bos[i]);
486 }
487
488 for (uint32_t i = 0; i + 1 < cs->read_write.bo_count; ++i) {
489 tu_bo_finish(cs->device, cs->read_write.bos[i]);
490 }
491
492 cs->writeable = false;
493
494 if (cs->read_only.bo_count) {
495 cs->read_only.bos[0] = cs->read_only.bos[cs->read_only.bo_count - 1];
496 cs->read_only.bo_count = 1;
497
498 cs->start = cs->cur = cs->reserved_end = (uint32_t *) cs->read_only.bos[0]->map;
499 cs->end = cs->start + cs->read_only.bos[0]->size / sizeof(uint32_t);
500 }
501
502 if (cs->read_write.bo_count) {
503 cs->read_write.bos[0] = cs->read_write.bos[cs->read_write.bo_count - 1];
504 cs->read_write.bo_count = 1;
505 }
506
507 cs->entry_count = 0;
508 }
509
510 uint64_t
tu_cs_emit_data_nop(struct tu_cs * cs,const uint32_t * data,uint32_t size,uint32_t align_dwords)511 tu_cs_emit_data_nop(struct tu_cs *cs,
512 const uint32_t *data,
513 uint32_t size,
514 uint32_t align_dwords)
515 {
516 uint32_t total_size = size + (align_dwords - 1);
517 tu_cs_emit_pkt7(cs, CP_NOP, total_size);
518
519 uint64_t iova = tu_cs_get_cur_iova(cs);
520 uint64_t iova_aligned = align64(iova, align_dwords * sizeof(uint32_t));
521 size_t offset = (iova_aligned - iova) / sizeof(uint32_t);
522 cs->cur += offset;
523 memcpy(cs->cur, data, size * sizeof(uint32_t));
524
525 cs->cur += total_size - offset;
526
527 return iova + offset * sizeof(uint32_t);
528 }
529
530 void
tu_cs_emit_debug_string(struct tu_cs * cs,const char * string,int len)531 tu_cs_emit_debug_string(struct tu_cs *cs, const char *string, int len)
532 {
533 assert(cs->mode == TU_CS_MODE_GROW);
534
535 /* max packet size is 0x3fff dwords */
536 len = MIN2(len, 0x3fff * 4);
537
538 tu_cs_emit_pkt7(cs, CP_NOP, align(len, 4) / 4);
539 const uint32_t *buf = (const uint32_t *) string;
540
541 tu_cs_emit_array(cs, buf, len / 4);
542 buf += len / 4;
543 len = len % 4;
544
545 /* copy remainder bytes without reading past end of input string */
546 if (len > 0) {
547 uint32_t w = 0;
548 memcpy(&w, buf, len);
549 tu_cs_emit(cs, w);
550 }
551 }
552
553 void
tu_cs_emit_debug_magic_strv(struct tu_cs * cs,uint32_t magic,const char * fmt,va_list args)554 tu_cs_emit_debug_magic_strv(struct tu_cs *cs,
555 uint32_t magic,
556 const char *fmt,
557 va_list args)
558 {
559 int fmt_len = vsnprintf(NULL, 0, fmt, args);
560 int len = 4 + fmt_len + 1;
561 char *string = (char *) malloc(len);
562
563 /* format: <magic><formatted string>\0 */
564 *(uint32_t *) string = magic;
565 vsnprintf(string + 4, fmt_len + 1, fmt, args);
566
567 tu_cs_emit_debug_string(cs, string, len);
568 free(string);
569 }
570
571 __attribute__((format(printf, 2, 3))) void
tu_cs_emit_debug_msg(struct tu_cs * cs,const char * fmt,...)572 tu_cs_emit_debug_msg(struct tu_cs *cs, const char *fmt, ...)
573 {
574 va_list args;
575 va_start(args, fmt);
576 tu_cs_emit_debug_magic_strv(cs, CP_NOP_MESG, fmt, args);
577 va_end(args);
578 }
579
580 void
tu_cs_trace_start(struct u_trace_context * utctx,void * cs,const char * fmt,...)581 tu_cs_trace_start(struct u_trace_context *utctx,
582 void *cs,
583 const char *fmt,
584 ...)
585 {
586 va_list args;
587 va_start(args, fmt);
588 tu_cs_emit_debug_magic_strv((struct tu_cs *) cs, CP_NOP_BEGN, fmt, args);
589 va_end(args);
590 }
591
592 void
tu_cs_trace_end(struct u_trace_context * utctx,void * cs,const char * fmt,...)593 tu_cs_trace_end(struct u_trace_context *utctx, void *cs, const char *fmt, ...)
594 {
595 va_list args;
596 va_start(args, fmt);
597 tu_cs_emit_debug_magic_strv((struct tu_cs *) cs, CP_NOP_END, fmt, args);
598 va_end(args);
599 }
600