• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "tu_cs.h"
7 
8 #include "tu_suballoc.h"
9 
10 /**
11  * Initialize a command stream.
12  */
13 void
tu_cs_init(struct tu_cs * cs,struct tu_device * device,enum tu_cs_mode mode,uint32_t initial_size,const char * name)14 tu_cs_init(struct tu_cs *cs,
15            struct tu_device *device,
16            enum tu_cs_mode mode,
17            uint32_t initial_size, const char *name)
18 {
19    assert(mode != TU_CS_MODE_EXTERNAL);
20 
21    memset(cs, 0, sizeof(*cs));
22 
23    cs->device = device;
24    cs->mode = mode;
25    cs->next_bo_size = initial_size;
26    cs->name = name;
27 }
28 
29 /**
30  * Initialize a command stream as a wrapper to an external buffer.
31  */
32 void
tu_cs_init_external(struct tu_cs * cs,struct tu_device * device,uint32_t * start,uint32_t * end,uint64_t iova,bool writeable)33 tu_cs_init_external(struct tu_cs *cs, struct tu_device *device,
34                     uint32_t *start, uint32_t *end, uint64_t iova,
35                     bool writeable)
36 {
37    memset(cs, 0, sizeof(*cs));
38 
39    cs->device = device;
40    cs->mode = TU_CS_MODE_EXTERNAL;
41    cs->start = cs->reserved_end = cs->cur = start;
42    cs->end = end;
43    cs->external_iova = iova;
44    cs->writeable = writeable;
45 }
46 
47 /**
48  * Initialize a sub-command stream as a wrapper to an externally sub-allocated
49  * buffer.
50  */
51 void
tu_cs_init_suballoc(struct tu_cs * cs,struct tu_device * device,struct tu_suballoc_bo * suballoc_bo)52 tu_cs_init_suballoc(struct tu_cs *cs, struct tu_device *device,
53                     struct tu_suballoc_bo *suballoc_bo)
54 {
55    uint32_t *start = (uint32_t *) tu_suballoc_bo_map(suballoc_bo);
56    uint32_t *end = start + (suballoc_bo->size >> 2);
57 
58    memset(cs, 0, sizeof(*cs));
59    cs->device = device;
60    cs->mode = TU_CS_MODE_SUB_STREAM;
61    cs->start = cs->reserved_end = cs->cur = start;
62    cs->end = end;
63    cs->refcount_bo = tu_bo_get_ref(suballoc_bo->bo);
64 }
65 
66 /**
67  * Finish and release all resources owned by a command stream.
68  */
69 void
tu_cs_finish(struct tu_cs * cs)70 tu_cs_finish(struct tu_cs *cs)
71 {
72    for (uint32_t i = 0; i < cs->read_only.bo_count; ++i) {
73       tu_bo_finish(cs->device, cs->read_only.bos[i]);
74    }
75 
76    for (uint32_t i = 0; i < cs->read_write.bo_count; ++i) {
77       tu_bo_finish(cs->device, cs->read_write.bos[i]);
78    }
79 
80    if (cs->refcount_bo)
81       tu_bo_finish(cs->device, cs->refcount_bo);
82 
83    free(cs->entries);
84    free(cs->read_only.bos);
85    free(cs->read_write.bos);
86 }
87 
88 static struct tu_bo *
tu_cs_current_bo(const struct tu_cs * cs)89 tu_cs_current_bo(const struct tu_cs *cs)
90 {
91    if (cs->refcount_bo) {
92       return cs->refcount_bo;
93    } else {
94       const struct tu_bo_array *bos = cs->writeable ? &cs->read_write : &cs->read_only;
95       assert(bos->bo_count);
96       return bos->bos[bos->bo_count - 1];
97    }
98 }
99 
100 /**
101  * Get the offset of the command packets emitted since the last call to
102  * tu_cs_add_entry.
103  */
104 static uint32_t
tu_cs_get_offset(const struct tu_cs * cs)105 tu_cs_get_offset(const struct tu_cs *cs)
106 {
107    return cs->start - (uint32_t *) tu_cs_current_bo(cs)->map;
108 }
109 
110 /* Get the iova for the next dword to be emitted. Useful after
111  * tu_cs_reserve_space() to create a patch point that can be overwritten on
112  * the GPU.
113  */
114 uint64_t
tu_cs_get_cur_iova(const struct tu_cs * cs)115 tu_cs_get_cur_iova(const struct tu_cs *cs)
116 {
117    if (cs->mode == TU_CS_MODE_EXTERNAL)
118       return cs->external_iova + ((char *) cs->cur - (char *) cs->start);
119    return tu_cs_current_bo(cs)->iova + ((char *) cs->cur - (char *) tu_cs_current_bo(cs)->map);
120 }
121 
122 /*
123  * Allocate and add a BO to a command stream.  Following command packets will
124  * be emitted to the new BO.
125  */
126 static VkResult
tu_cs_add_bo(struct tu_cs * cs,uint32_t size)127 tu_cs_add_bo(struct tu_cs *cs, uint32_t size)
128 {
129    /* no BO for TU_CS_MODE_EXTERNAL */
130    assert(cs->mode != TU_CS_MODE_EXTERNAL);
131    /* No adding more BOs if suballocating from a suballoc_bo. */
132    assert(!cs->refcount_bo);
133 
134    /* no dangling command packet */
135    assert(tu_cs_is_empty(cs));
136 
137    struct tu_bo_array *bos = cs->writeable ? &cs->read_write : &cs->read_only;
138 
139    /* grow cs->bos if needed */
140    if (bos->bo_count == bos->bo_capacity) {
141       uint32_t new_capacity = MAX2(4, 2 * bos->bo_capacity);
142       struct tu_bo **new_bos = (struct tu_bo **)
143          realloc(bos->bos, new_capacity * sizeof(struct tu_bo *));
144       if (!new_bos)
145          return VK_ERROR_OUT_OF_HOST_MEMORY;
146 
147       bos->bo_capacity = new_capacity;
148       bos->bos = new_bos;
149    }
150 
151    struct tu_bo *new_bo;
152 
153    VkResult result =
154       tu_bo_init_new(cs->device, &new_bo, size * sizeof(uint32_t),
155                      (enum tu_bo_alloc_flags)(COND(!cs->writeable,
156                                                    TU_BO_ALLOC_GPU_READ_ONLY) |
157                                               TU_BO_ALLOC_ALLOW_DUMP),
158                      cs->name);
159    if (result != VK_SUCCESS) {
160       return result;
161    }
162 
163    result = tu_bo_map(cs->device, new_bo);
164    if (result != VK_SUCCESS) {
165       tu_bo_finish(cs->device, new_bo);
166       return result;
167    }
168 
169    bos->bos[bos->bo_count++] = new_bo;
170 
171    cs->start = cs->cur = cs->reserved_end = (uint32_t *) new_bo->map;
172    cs->end = cs->start + new_bo->size / sizeof(uint32_t);
173 
174    return VK_SUCCESS;
175 }
176 
177 /**
178  * Reserve an IB entry.
179  */
180 static VkResult
tu_cs_reserve_entry(struct tu_cs * cs)181 tu_cs_reserve_entry(struct tu_cs *cs)
182 {
183    /* entries are only for TU_CS_MODE_GROW */
184    assert(cs->mode == TU_CS_MODE_GROW);
185 
186    /* grow cs->entries if needed */
187    if (cs->entry_count == cs->entry_capacity) {
188       uint32_t new_capacity = MAX2(4, cs->entry_capacity * 2);
189       struct tu_cs_entry *new_entries = (struct tu_cs_entry *)
190          realloc(cs->entries, new_capacity * sizeof(struct tu_cs_entry));
191       if (!new_entries)
192          return VK_ERROR_OUT_OF_HOST_MEMORY;
193 
194       cs->entry_capacity = new_capacity;
195       cs->entries = new_entries;
196    }
197 
198    return VK_SUCCESS;
199 }
200 
201 
202 /**
203  * Add an IB entry for the command packets emitted since the last call to this
204  * function.
205  */
206 static void
tu_cs_add_entry(struct tu_cs * cs)207 tu_cs_add_entry(struct tu_cs *cs)
208 {
209    /* entries are only for TU_CS_MODE_GROW */
210    assert(cs->mode == TU_CS_MODE_GROW);
211 
212    /* disallow empty entry */
213    assert(!tu_cs_is_empty(cs));
214 
215    /*
216     * because we disallow empty entry, tu_cs_add_bo and tu_cs_reserve_entry
217     * must both have been called
218     */
219    assert(cs->writeable ? cs->read_write.bo_count : cs->read_only.bo_count);
220    assert(cs->entry_count < cs->entry_capacity);
221 
222    /* add an entry for [cs->start, cs->cur] */
223    cs->entries[cs->entry_count++] = (struct tu_cs_entry) {
224       .bo = tu_cs_current_bo(cs),
225       .size = tu_cs_get_size(cs) * sizeof(uint32_t),
226       .offset = tu_cs_get_offset(cs) * sizeof(uint32_t),
227    };
228 
229    cs->start = cs->cur;
230 }
231 
232 /**
233  * same behavior as tu_cs_emit_call but without the indirect
234  */
235 VkResult
tu_cs_add_entries(struct tu_cs * cs,struct tu_cs * target)236 tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target)
237 {
238    VkResult result;
239 
240    assert(cs->mode == TU_CS_MODE_GROW);
241    assert(target->mode == TU_CS_MODE_GROW);
242 
243    if (!tu_cs_is_empty(cs))
244       tu_cs_add_entry(cs);
245 
246    for (unsigned i = 0; i < target->entry_count; i++) {
247       result = tu_cs_reserve_entry(cs);
248       if (result != VK_SUCCESS)
249          return result;
250       cs->entries[cs->entry_count++] = target->entries[i];
251    }
252 
253    return VK_SUCCESS;
254 }
255 
256 /**
257  * Begin (or continue) command packet emission.  This does nothing but sanity
258  * checks currently.  \a cs must not be in TU_CS_MODE_SUB_STREAM mode.
259  */
260 void
tu_cs_begin(struct tu_cs * cs)261 tu_cs_begin(struct tu_cs *cs)
262 {
263    assert(cs->mode != TU_CS_MODE_SUB_STREAM);
264    assert(tu_cs_is_empty(cs));
265 }
266 
267 /**
268  * End command packet emission.  This adds an IB entry when \a cs is in
269  * TU_CS_MODE_GROW mode.
270  */
271 void
tu_cs_end(struct tu_cs * cs)272 tu_cs_end(struct tu_cs *cs)
273 {
274    assert(cs->mode != TU_CS_MODE_SUB_STREAM);
275 
276    if (cs->mode == TU_CS_MODE_GROW && !tu_cs_is_empty(cs))
277       tu_cs_add_entry(cs);
278 }
279 
280 void
tu_cs_set_writeable(struct tu_cs * cs,bool writeable)281 tu_cs_set_writeable(struct tu_cs *cs, bool writeable)
282 {
283    assert(cs->mode == TU_CS_MODE_GROW || cs->mode == TU_CS_MODE_SUB_STREAM);
284 
285    if (cs->writeable != writeable) {
286       if (cs->mode == TU_CS_MODE_GROW && !tu_cs_is_empty(cs))
287          tu_cs_add_entry(cs);
288       struct tu_bo_array *old_bos = cs->writeable ? &cs->read_write : &cs->read_only;
289       struct tu_bo_array *new_bos = writeable ? &cs->read_write : &cs->read_only;
290 
291       old_bos->start = cs->start;
292       cs->start = cs->cur = cs->reserved_end = new_bos->start;
293       if (new_bos->bo_count) {
294          struct tu_bo *bo = new_bos->bos[new_bos->bo_count - 1];
295          cs->end = (uint32_t *)bo->map + bo->size / sizeof(uint32_t);
296       } else {
297          cs->end = NULL;
298       }
299 
300       cs->writeable = writeable;
301    }
302 }
303 
304 /**
305  * Begin command packet emission to a sub-stream.  \a cs must be in
306  * TU_CS_MODE_SUB_STREAM mode.
307  *
308  * Return \a sub_cs which is in TU_CS_MODE_EXTERNAL mode.  tu_cs_begin and
309  * tu_cs_reserve_space are implied and \a sub_cs is ready for command packet
310  * emission.
311  */
312 VkResult
tu_cs_begin_sub_stream(struct tu_cs * cs,uint32_t size,struct tu_cs * sub_cs)313 tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs)
314 {
315    assert(cs->mode == TU_CS_MODE_SUB_STREAM);
316    assert(size);
317 
318    VkResult result = tu_cs_reserve_space(cs, size);
319    if (result != VK_SUCCESS)
320       return result;
321 
322    tu_cs_init_external(sub_cs, cs->device, cs->cur, cs->reserved_end,
323                        tu_cs_get_cur_iova(cs), cs->writeable);
324    tu_cs_begin(sub_cs);
325    result = tu_cs_reserve_space(sub_cs, size);
326    assert(result == VK_SUCCESS);
327 
328    return VK_SUCCESS;
329 }
330 
331 /**
332  * Allocate count*size dwords, aligned to size dwords.
333  * \a cs must be in TU_CS_MODE_SUB_STREAM mode.
334  *
335  */
336 VkResult
tu_cs_alloc(struct tu_cs * cs,uint32_t count,uint32_t size,struct tu_cs_memory * memory)337 tu_cs_alloc(struct tu_cs *cs,
338             uint32_t count,
339             uint32_t size,
340             struct tu_cs_memory *memory)
341 {
342    assert(cs->mode == TU_CS_MODE_SUB_STREAM);
343    assert(size && size <= 1024);
344 
345    if (!count) {
346       /* If you allocated no memory, you'd better not use the iova for anything
347        * (but it's left aligned for sanity).
348        */
349       memory->map = NULL;
350       memory->iova = 0xdead0000;
351       return VK_SUCCESS;
352    }
353 
354    /* TODO: smarter way to deal with alignment? */
355 
356    VkResult result = tu_cs_reserve_space(cs, count * size + (size-1));
357    if (result != VK_SUCCESS)
358       return result;
359 
360    struct tu_bo *bo = tu_cs_current_bo(cs);
361    size_t offset = align(tu_cs_get_offset(cs), size);
362 
363    memory->map = (uint32_t *) bo->map + offset;
364    memory->iova = bo->iova + offset * sizeof(uint32_t);
365    memory->writeable = cs->writeable;
366 
367    cs->start = cs->cur = (uint32_t*) bo->map + offset + count * size;
368 
369    return VK_SUCCESS;
370 }
371 
372 /**
373  * End command packet emission to a sub-stream.  \a sub_cs becomes invalid
374  * after this call.
375  *
376  * Return an IB entry for the sub-stream.  The entry has the same lifetime as
377  * \a cs.
378  */
379 struct tu_cs_entry
tu_cs_end_sub_stream(struct tu_cs * cs,struct tu_cs * sub_cs)380 tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs)
381 {
382    assert(cs->mode == TU_CS_MODE_SUB_STREAM);
383    assert(sub_cs->start == cs->cur && sub_cs->end == cs->reserved_end);
384    tu_cs_sanity_check(sub_cs);
385 
386    tu_cs_end(sub_cs);
387 
388    cs->cur = sub_cs->cur;
389 
390    struct tu_cs_entry entry = {
391       .bo = tu_cs_current_bo(cs),
392       .size = tu_cs_get_size(cs) * sizeof(uint32_t),
393       .offset = tu_cs_get_offset(cs) * sizeof(uint32_t),
394    };
395 
396    cs->start = cs->cur;
397 
398    return entry;
399 }
400 
401 /**
402  * Reserve space from a command stream for \a reserved_size uint32_t values.
403  * This never fails when \a cs has mode TU_CS_MODE_EXTERNAL.
404  */
405 VkResult
tu_cs_reserve_space(struct tu_cs * cs,uint32_t reserved_size)406 tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size)
407 {
408    if (tu_cs_get_space(cs) < reserved_size) {
409       if (cs->mode == TU_CS_MODE_EXTERNAL) {
410          unreachable("cannot grow external buffer");
411          return VK_ERROR_OUT_OF_HOST_MEMORY;
412       }
413 
414       /* add an entry for the exiting command packets */
415       if (!tu_cs_is_empty(cs)) {
416          /* no direct command packet for TU_CS_MODE_SUB_STREAM */
417          assert(cs->mode != TU_CS_MODE_SUB_STREAM);
418 
419          tu_cs_add_entry(cs);
420       }
421 
422       for (uint32_t i = 0; i < cs->cond_stack_depth; i++) {
423          /* Subtract one here to account for the DWORD field itself. */
424          *cs->cond_dwords[i] = cs->cur - cs->cond_dwords[i] - 1;
425 
426          /* space for CP_COND_REG_EXEC in next bo */
427          reserved_size += 3;
428       }
429 
430       /* switch to a new BO */
431       uint32_t new_size = MAX2(cs->next_bo_size, reserved_size);
432       VkResult result = tu_cs_add_bo(cs, new_size);
433       if (result != VK_SUCCESS)
434          return result;
435 
436       if (cs->cond_stack_depth) {
437          cs->reserved_end = cs->cur + reserved_size;
438       }
439 
440       /* Re-emit CP_COND_REG_EXECs */
441       for (uint32_t i = 0; i < cs->cond_stack_depth; i++) {
442          tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
443          tu_cs_emit(cs, cs->cond_flags[i]);
444 
445          cs->cond_dwords[i] = cs->cur;
446 
447          /* Emit dummy DWORD field here */
448          tu_cs_emit(cs, RENDER_MODE_CP_COND_REG_EXEC_1_DWORDS(0));
449       }
450 
451       /* double the size for the next bo, also there is an upper
452        * bound on IB size, which appears to be 0x0fffff
453        */
454       new_size = MIN2(new_size << 1, 0x0fffff);
455       if (cs->next_bo_size < new_size)
456          cs->next_bo_size = new_size;
457    }
458 
459    assert(tu_cs_get_space(cs) >= reserved_size);
460    cs->reserved_end = cs->cur + reserved_size;
461 
462    if (cs->mode == TU_CS_MODE_GROW) {
463       /* reserve an entry for the next call to this function or tu_cs_end */
464       return tu_cs_reserve_entry(cs);
465    }
466 
467    return VK_SUCCESS;
468 }
469 
470 /**
471  * Reset a command stream to its initial state.  This discards all comand
472  * packets in \a cs, but does not necessarily release all resources.
473  */
474 void
tu_cs_reset(struct tu_cs * cs)475 tu_cs_reset(struct tu_cs *cs)
476 {
477    if (cs->mode == TU_CS_MODE_EXTERNAL) {
478       assert(!cs->read_only.bo_count && !cs->read_write.bo_count &&
479              !cs->refcount_bo && !cs->entry_count);
480       cs->reserved_end = cs->cur = cs->start;
481       return;
482    }
483 
484    for (uint32_t i = 0; i + 1 < cs->read_only.bo_count; ++i) {
485       tu_bo_finish(cs->device, cs->read_only.bos[i]);
486    }
487 
488    for (uint32_t i = 0; i + 1 < cs->read_write.bo_count; ++i) {
489       tu_bo_finish(cs->device, cs->read_write.bos[i]);
490    }
491 
492    cs->writeable = false;
493 
494    if (cs->read_only.bo_count) {
495       cs->read_only.bos[0] = cs->read_only.bos[cs->read_only.bo_count - 1];
496       cs->read_only.bo_count = 1;
497 
498       cs->start = cs->cur = cs->reserved_end = (uint32_t *) cs->read_only.bos[0]->map;
499       cs->end = cs->start + cs->read_only.bos[0]->size / sizeof(uint32_t);
500    }
501 
502    if (cs->read_write.bo_count) {
503       cs->read_write.bos[0] = cs->read_write.bos[cs->read_write.bo_count - 1];
504       cs->read_write.bo_count = 1;
505    }
506 
507    cs->entry_count = 0;
508 }
509 
510 uint64_t
tu_cs_emit_data_nop(struct tu_cs * cs,const uint32_t * data,uint32_t size,uint32_t align_dwords)511 tu_cs_emit_data_nop(struct tu_cs *cs,
512                     const uint32_t *data,
513                     uint32_t size,
514                     uint32_t align_dwords)
515 {
516    uint32_t total_size = size + (align_dwords - 1);
517    tu_cs_emit_pkt7(cs, CP_NOP, total_size);
518 
519    uint64_t iova = tu_cs_get_cur_iova(cs);
520    uint64_t iova_aligned = align64(iova, align_dwords * sizeof(uint32_t));
521    size_t offset = (iova_aligned - iova) / sizeof(uint32_t);
522    cs->cur += offset;
523    memcpy(cs->cur, data, size * sizeof(uint32_t));
524 
525    cs->cur += total_size - offset;
526 
527    return iova + offset * sizeof(uint32_t);
528 }
529 
530 void
tu_cs_emit_debug_string(struct tu_cs * cs,const char * string,int len)531 tu_cs_emit_debug_string(struct tu_cs *cs, const char *string, int len)
532 {
533    assert(cs->mode == TU_CS_MODE_GROW);
534 
535    /* max packet size is 0x3fff dwords */
536    len = MIN2(len, 0x3fff * 4);
537 
538    tu_cs_emit_pkt7(cs, CP_NOP, align(len, 4) / 4);
539    const uint32_t *buf = (const uint32_t *) string;
540 
541    tu_cs_emit_array(cs, buf, len / 4);
542    buf += len / 4;
543    len = len % 4;
544 
545    /* copy remainder bytes without reading past end of input string */
546    if (len > 0) {
547       uint32_t w = 0;
548       memcpy(&w, buf, len);
549       tu_cs_emit(cs, w);
550    }
551 }
552 
553 void
tu_cs_emit_debug_magic_strv(struct tu_cs * cs,uint32_t magic,const char * fmt,va_list args)554 tu_cs_emit_debug_magic_strv(struct tu_cs *cs,
555                             uint32_t magic,
556                             const char *fmt,
557                             va_list args)
558 {
559    int fmt_len = vsnprintf(NULL, 0, fmt, args);
560    int len = 4 + fmt_len + 1;
561    char *string = (char *) malloc(len);
562 
563    /* format: <magic><formatted string>\0 */
564    *(uint32_t *) string = magic;
565    vsnprintf(string + 4, fmt_len + 1, fmt, args);
566 
567    tu_cs_emit_debug_string(cs, string, len);
568    free(string);
569 }
570 
571 __attribute__((format(printf, 2, 3))) void
tu_cs_emit_debug_msg(struct tu_cs * cs,const char * fmt,...)572 tu_cs_emit_debug_msg(struct tu_cs *cs, const char *fmt, ...)
573 {
574    va_list args;
575    va_start(args, fmt);
576    tu_cs_emit_debug_magic_strv(cs, CP_NOP_MESG, fmt, args);
577    va_end(args);
578 }
579 
580 void
tu_cs_trace_start(struct u_trace_context * utctx,void * cs,const char * fmt,...)581 tu_cs_trace_start(struct u_trace_context *utctx,
582                   void *cs,
583                   const char *fmt,
584                   ...)
585 {
586    va_list args;
587    va_start(args, fmt);
588    tu_cs_emit_debug_magic_strv((struct tu_cs *) cs, CP_NOP_BEGN, fmt, args);
589    va_end(args);
590 }
591 
592 void
tu_cs_trace_end(struct u_trace_context * utctx,void * cs,const char * fmt,...)593 tu_cs_trace_end(struct u_trace_context *utctx, void *cs, const char *fmt, ...)
594 {
595    va_list args;
596    va_start(args, fmt);
597    tu_cs_emit_debug_magic_strv((struct tu_cs *) cs, CP_NOP_END, fmt, args);
598    va_end(args);
599 }
600