1 /*
2 * Copyright © 2019 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "tu_cs.h"
7
8 #include "tu_suballoc.h"
9
10 /**
11 * Initialize a command stream.
12 */
13 void
tu_cs_init(struct tu_cs * cs,struct tu_device * device,enum tu_cs_mode mode,uint32_t initial_size)14 tu_cs_init(struct tu_cs *cs,
15 struct tu_device *device,
16 enum tu_cs_mode mode,
17 uint32_t initial_size)
18 {
19 assert(mode != TU_CS_MODE_EXTERNAL);
20
21 memset(cs, 0, sizeof(*cs));
22
23 cs->device = device;
24 cs->mode = mode;
25 cs->next_bo_size = initial_size;
26 }
27
28 /**
29 * Initialize a command stream as a wrapper to an external buffer.
30 */
31 void
tu_cs_init_external(struct tu_cs * cs,struct tu_device * device,uint32_t * start,uint32_t * end)32 tu_cs_init_external(struct tu_cs *cs, struct tu_device *device,
33 uint32_t *start, uint32_t *end)
34 {
35 memset(cs, 0, sizeof(*cs));
36
37 cs->device = device;
38 cs->mode = TU_CS_MODE_EXTERNAL;
39 cs->start = cs->reserved_end = cs->cur = start;
40 cs->end = end;
41 }
42
43 /**
44 * Initialize a sub-command stream as a wrapper to an externally sub-allocated
45 * buffer.
46 */
47 void
tu_cs_init_suballoc(struct tu_cs * cs,struct tu_device * device,struct tu_suballoc_bo * suballoc_bo)48 tu_cs_init_suballoc(struct tu_cs *cs, struct tu_device *device,
49 struct tu_suballoc_bo *suballoc_bo)
50 {
51 uint32_t *start = tu_suballoc_bo_map(suballoc_bo);
52 uint32_t *end = start + (suballoc_bo->size >> 2);
53
54 memset(cs, 0, sizeof(*cs));
55 cs->device = device;
56 cs->mode = TU_CS_MODE_SUB_STREAM;
57 cs->start = cs->reserved_end = cs->cur = start;
58 cs->end = end;
59 cs->refcount_bo = tu_bo_get_ref(suballoc_bo->bo);
60 }
61
62 /**
63 * Finish and release all resources owned by a command stream.
64 */
65 void
tu_cs_finish(struct tu_cs * cs)66 tu_cs_finish(struct tu_cs *cs)
67 {
68 for (uint32_t i = 0; i < cs->bo_count; ++i) {
69 tu_bo_finish(cs->device, cs->bos[i]);
70 }
71
72 if (cs->refcount_bo)
73 tu_bo_finish(cs->device, cs->refcount_bo);
74
75 free(cs->entries);
76 free(cs->bos);
77 }
78
79 static struct tu_bo *
tu_cs_current_bo(const struct tu_cs * cs)80 tu_cs_current_bo(const struct tu_cs *cs)
81 {
82 if (cs->refcount_bo) {
83 return cs->refcount_bo;
84 } else {
85 assert(cs->bo_count);
86 return cs->bos[cs->bo_count - 1];
87 }
88 }
89
90 /**
91 * Get the offset of the command packets emitted since the last call to
92 * tu_cs_add_entry.
93 */
94 static uint32_t
tu_cs_get_offset(const struct tu_cs * cs)95 tu_cs_get_offset(const struct tu_cs *cs)
96 {
97 return cs->start - (uint32_t *) tu_cs_current_bo(cs)->map;
98 }
99
100 /*
101 * Allocate and add a BO to a command stream. Following command packets will
102 * be emitted to the new BO.
103 */
104 static VkResult
tu_cs_add_bo(struct tu_cs * cs,uint32_t size)105 tu_cs_add_bo(struct tu_cs *cs, uint32_t size)
106 {
107 /* no BO for TU_CS_MODE_EXTERNAL */
108 assert(cs->mode != TU_CS_MODE_EXTERNAL);
109 /* No adding more BOs if suballocating from a suballoc_bo. */
110 assert(!cs->refcount_bo);
111
112 /* no dangling command packet */
113 assert(tu_cs_is_empty(cs));
114
115 /* grow cs->bos if needed */
116 if (cs->bo_count == cs->bo_capacity) {
117 uint32_t new_capacity = MAX2(4, 2 * cs->bo_capacity);
118 struct tu_bo **new_bos =
119 realloc(cs->bos, new_capacity * sizeof(struct tu_bo *));
120 if (!new_bos)
121 return VK_ERROR_OUT_OF_HOST_MEMORY;
122
123 cs->bo_capacity = new_capacity;
124 cs->bos = new_bos;
125 }
126
127 struct tu_bo *new_bo;
128
129 VkResult result =
130 tu_bo_init_new(cs->device, &new_bo, size * sizeof(uint32_t),
131 TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP);
132 if (result != VK_SUCCESS) {
133 return result;
134 }
135
136 result = tu_bo_map(cs->device, new_bo);
137 if (result != VK_SUCCESS) {
138 tu_bo_finish(cs->device, new_bo);
139 return result;
140 }
141
142 cs->bos[cs->bo_count++] = new_bo;
143
144 cs->start = cs->cur = cs->reserved_end = (uint32_t *) new_bo->map;
145 cs->end = cs->start + new_bo->size / sizeof(uint32_t);
146
147 return VK_SUCCESS;
148 }
149
150 /**
151 * Reserve an IB entry.
152 */
153 static VkResult
tu_cs_reserve_entry(struct tu_cs * cs)154 tu_cs_reserve_entry(struct tu_cs *cs)
155 {
156 /* entries are only for TU_CS_MODE_GROW */
157 assert(cs->mode == TU_CS_MODE_GROW);
158
159 /* grow cs->entries if needed */
160 if (cs->entry_count == cs->entry_capacity) {
161 uint32_t new_capacity = MAX2(4, cs->entry_capacity * 2);
162 struct tu_cs_entry *new_entries =
163 realloc(cs->entries, new_capacity * sizeof(struct tu_cs_entry));
164 if (!new_entries)
165 return VK_ERROR_OUT_OF_HOST_MEMORY;
166
167 cs->entry_capacity = new_capacity;
168 cs->entries = new_entries;
169 }
170
171 return VK_SUCCESS;
172 }
173
174 /**
175 * Add an IB entry for the command packets emitted since the last call to this
176 * function.
177 */
178 static void
tu_cs_add_entry(struct tu_cs * cs)179 tu_cs_add_entry(struct tu_cs *cs)
180 {
181 /* entries are only for TU_CS_MODE_GROW */
182 assert(cs->mode == TU_CS_MODE_GROW);
183
184 /* disallow empty entry */
185 assert(!tu_cs_is_empty(cs));
186
187 /*
188 * because we disallow empty entry, tu_cs_add_bo and tu_cs_reserve_entry
189 * must both have been called
190 */
191 assert(cs->bo_count);
192 assert(cs->entry_count < cs->entry_capacity);
193
194 /* add an entry for [cs->start, cs->cur] */
195 cs->entries[cs->entry_count++] = (struct tu_cs_entry) {
196 .bo = tu_cs_current_bo(cs),
197 .size = tu_cs_get_size(cs) * sizeof(uint32_t),
198 .offset = tu_cs_get_offset(cs) * sizeof(uint32_t),
199 };
200
201 cs->start = cs->cur;
202 }
203
204 /**
205 * same behavior as tu_cs_emit_call but without the indirect
206 */
207 VkResult
tu_cs_add_entries(struct tu_cs * cs,struct tu_cs * target)208 tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target)
209 {
210 VkResult result;
211
212 assert(cs->mode == TU_CS_MODE_GROW);
213 assert(target->mode == TU_CS_MODE_GROW);
214
215 if (!tu_cs_is_empty(cs))
216 tu_cs_add_entry(cs);
217
218 for (unsigned i = 0; i < target->entry_count; i++) {
219 result = tu_cs_reserve_entry(cs);
220 if (result != VK_SUCCESS)
221 return result;
222 cs->entries[cs->entry_count++] = target->entries[i];
223 }
224
225 return VK_SUCCESS;
226 }
227
228 /**
229 * Begin (or continue) command packet emission. This does nothing but sanity
230 * checks currently. \a cs must not be in TU_CS_MODE_SUB_STREAM mode.
231 */
232 void
tu_cs_begin(struct tu_cs * cs)233 tu_cs_begin(struct tu_cs *cs)
234 {
235 assert(cs->mode != TU_CS_MODE_SUB_STREAM);
236 assert(tu_cs_is_empty(cs));
237 }
238
239 /**
240 * End command packet emission. This adds an IB entry when \a cs is in
241 * TU_CS_MODE_GROW mode.
242 */
243 void
tu_cs_end(struct tu_cs * cs)244 tu_cs_end(struct tu_cs *cs)
245 {
246 assert(cs->mode != TU_CS_MODE_SUB_STREAM);
247
248 if (cs->mode == TU_CS_MODE_GROW && !tu_cs_is_empty(cs))
249 tu_cs_add_entry(cs);
250 }
251
252 /**
253 * Begin command packet emission to a sub-stream. \a cs must be in
254 * TU_CS_MODE_SUB_STREAM mode.
255 *
256 * Return \a sub_cs which is in TU_CS_MODE_EXTERNAL mode. tu_cs_begin and
257 * tu_cs_reserve_space are implied and \a sub_cs is ready for command packet
258 * emission.
259 */
260 VkResult
tu_cs_begin_sub_stream(struct tu_cs * cs,uint32_t size,struct tu_cs * sub_cs)261 tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs)
262 {
263 assert(cs->mode == TU_CS_MODE_SUB_STREAM);
264 assert(size);
265
266 VkResult result = tu_cs_reserve_space(cs, size);
267 if (result != VK_SUCCESS)
268 return result;
269
270 tu_cs_init_external(sub_cs, cs->device, cs->cur, cs->reserved_end);
271 tu_cs_begin(sub_cs);
272 result = tu_cs_reserve_space(sub_cs, size);
273 assert(result == VK_SUCCESS);
274
275 return VK_SUCCESS;
276 }
277
278 /**
279 * Allocate count*size dwords, aligned to size dwords.
280 * \a cs must be in TU_CS_MODE_SUB_STREAM mode.
281 *
282 */
283 VkResult
tu_cs_alloc(struct tu_cs * cs,uint32_t count,uint32_t size,struct tu_cs_memory * memory)284 tu_cs_alloc(struct tu_cs *cs,
285 uint32_t count,
286 uint32_t size,
287 struct tu_cs_memory *memory)
288 {
289 assert(cs->mode == TU_CS_MODE_SUB_STREAM);
290 assert(size && size <= 1024);
291
292 if (!count)
293 return VK_SUCCESS;
294
295 /* TODO: smarter way to deal with alignment? */
296
297 VkResult result = tu_cs_reserve_space(cs, count * size + (size-1));
298 if (result != VK_SUCCESS)
299 return result;
300
301 struct tu_bo *bo = tu_cs_current_bo(cs);
302 size_t offset = align(tu_cs_get_offset(cs), size);
303
304 memory->map = bo->map + offset * sizeof(uint32_t);
305 memory->iova = bo->iova + offset * sizeof(uint32_t);
306
307 cs->start = cs->cur = (uint32_t*) bo->map + offset + count * size;
308
309 return VK_SUCCESS;
310 }
311
312 /**
313 * End command packet emission to a sub-stream. \a sub_cs becomes invalid
314 * after this call.
315 *
316 * Return an IB entry for the sub-stream. The entry has the same lifetime as
317 * \a cs.
318 */
319 struct tu_cs_entry
tu_cs_end_sub_stream(struct tu_cs * cs,struct tu_cs * sub_cs)320 tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs)
321 {
322 assert(cs->mode == TU_CS_MODE_SUB_STREAM);
323 assert(sub_cs->start == cs->cur && sub_cs->end == cs->reserved_end);
324 tu_cs_sanity_check(sub_cs);
325
326 tu_cs_end(sub_cs);
327
328 cs->cur = sub_cs->cur;
329
330 struct tu_cs_entry entry = {
331 .bo = tu_cs_current_bo(cs),
332 .size = tu_cs_get_size(cs) * sizeof(uint32_t),
333 .offset = tu_cs_get_offset(cs) * sizeof(uint32_t),
334 };
335
336 cs->start = cs->cur;
337
338 return entry;
339 }
340
341 /**
342 * Reserve space from a command stream for \a reserved_size uint32_t values.
343 * This never fails when \a cs has mode TU_CS_MODE_EXTERNAL.
344 */
345 VkResult
tu_cs_reserve_space(struct tu_cs * cs,uint32_t reserved_size)346 tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size)
347 {
348 if (tu_cs_get_space(cs) < reserved_size) {
349 if (cs->mode == TU_CS_MODE_EXTERNAL) {
350 unreachable("cannot grow external buffer");
351 return VK_ERROR_OUT_OF_HOST_MEMORY;
352 }
353
354 /* add an entry for the exiting command packets */
355 if (!tu_cs_is_empty(cs)) {
356 /* no direct command packet for TU_CS_MODE_SUB_STREAM */
357 assert(cs->mode != TU_CS_MODE_SUB_STREAM);
358
359 tu_cs_add_entry(cs);
360 }
361
362 for (uint32_t i = 0; i < cs->cond_stack_depth; i++) {
363 /* Subtract one here to account for the DWORD field itself. */
364 *cs->cond_dwords[i] = cs->cur - cs->cond_dwords[i] - 1;
365
366 /* space for CP_COND_REG_EXEC in next bo */
367 reserved_size += 3;
368 }
369
370 /* switch to a new BO */
371 uint32_t new_size = MAX2(cs->next_bo_size, reserved_size);
372 VkResult result = tu_cs_add_bo(cs, new_size);
373 if (result != VK_SUCCESS)
374 return result;
375
376 if (cs->cond_stack_depth) {
377 cs->reserved_end = cs->cur + reserved_size;
378 }
379
380 /* Re-emit CP_COND_REG_EXECs */
381 for (uint32_t i = 0; i < cs->cond_stack_depth; i++) {
382 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
383 tu_cs_emit(cs, cs->cond_flags[i]);
384
385 cs->cond_dwords[i] = cs->cur;
386
387 /* Emit dummy DWORD field here */
388 tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0));
389 }
390
391 /* double the size for the next bo, also there is an upper
392 * bound on IB size, which appears to be 0x0fffff
393 */
394 new_size = MIN2(new_size << 1, 0x0fffff);
395 if (cs->next_bo_size < new_size)
396 cs->next_bo_size = new_size;
397 }
398
399 assert(tu_cs_get_space(cs) >= reserved_size);
400 cs->reserved_end = cs->cur + reserved_size;
401
402 if (cs->mode == TU_CS_MODE_GROW) {
403 /* reserve an entry for the next call to this function or tu_cs_end */
404 return tu_cs_reserve_entry(cs);
405 }
406
407 return VK_SUCCESS;
408 }
409
410 /**
411 * Reset a command stream to its initial state. This discards all comand
412 * packets in \a cs, but does not necessarily release all resources.
413 */
414 void
tu_cs_reset(struct tu_cs * cs)415 tu_cs_reset(struct tu_cs *cs)
416 {
417 if (cs->mode == TU_CS_MODE_EXTERNAL) {
418 assert(!cs->bo_count && !cs->refcount_bo && !cs->entry_count);
419 cs->reserved_end = cs->cur = cs->start;
420 return;
421 }
422
423 for (uint32_t i = 0; i + 1 < cs->bo_count; ++i) {
424 tu_bo_finish(cs->device, cs->bos[i]);
425 }
426
427 if (cs->bo_count) {
428 cs->bos[0] = cs->bos[cs->bo_count - 1];
429 cs->bo_count = 1;
430
431 cs->start = cs->cur = cs->reserved_end = (uint32_t *) cs->bos[0]->map;
432 cs->end = cs->start + cs->bos[0]->size / sizeof(uint32_t);
433 }
434
435 cs->entry_count = 0;
436 }
437