1 /*
2 * Copyright © 2019 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #ifndef TU_CS_H
7 #define TU_CS_H
8
9 #include "tu_common.h"
10
11 #include "freedreno_pm4.h"
12
13 #include "tu_knl.h"
14
15 /* For breadcrumbs we may open a network socket based on the envvar,
16 * it's not something that should be enabled by default.
17 */
18 #define TU_BREADCRUMBS_ENABLED 0
19
20 enum tu_cs_mode
21 {
22
23 /*
24 * A command stream in TU_CS_MODE_GROW mode grows automatically whenever it
25 * is full. tu_cs_begin must be called before command packet emission and
26 * tu_cs_end must be called after.
27 *
28 * This mode may create multiple entries internally. The entries must be
29 * submitted together.
30 */
31 TU_CS_MODE_GROW,
32
33 /*
34 * A command stream in TU_CS_MODE_EXTERNAL mode wraps an external,
35 * fixed-size buffer. tu_cs_begin and tu_cs_end are optional and have no
36 * effect on it.
37 *
38 * This mode does not create any entry or any BO.
39 */
40 TU_CS_MODE_EXTERNAL,
41
42 /*
43 * A command stream in TU_CS_MODE_SUB_STREAM mode does not support direct
44 * command packet emission. tu_cs_begin_sub_stream must be called to get a
45 * sub-stream to emit comamnd packets to. When done with the sub-stream,
46 * tu_cs_end_sub_stream must be called.
47 *
48 * This mode does not create any entry internally.
49 */
50 TU_CS_MODE_SUB_STREAM,
51 };
52
53 struct tu_cs_entry
54 {
55 /* No ownership */
56 const struct tu_bo *bo;
57
58 uint32_t size;
59 uint32_t offset;
60 };
61
62 struct tu_cs_memory {
63 uint32_t *map;
64 uint64_t iova;
65 bool writeable;
66 };
67
68 struct tu_draw_state {
69 uint64_t iova;
70 uint16_t size;
71 bool writeable;
72 };
73
74 struct tu_bo_array {
75 struct tu_bo **bos;
76 uint32_t bo_count;
77 uint32_t bo_capacity;
78 uint32_t *start;
79 };
80
81 #define TU_COND_EXEC_STACK_SIZE 4
82
83 struct tu_cs
84 {
85 uint32_t *start;
86 uint32_t *cur;
87 uint32_t *reserved_end;
88 uint32_t *end;
89 const char *name;
90
91 struct tu_device *device;
92 enum tu_cs_mode mode;
93 bool writeable;
94 uint32_t next_bo_size;
95
96 struct tu_cs_entry *entries;
97 uint32_t entry_count;
98 uint32_t entry_capacity;
99
100 struct tu_bo_array read_only, read_write;
101
102 /* Optional BO that this CS is sub-allocated from for TU_CS_MODE_SUB_STREAM */
103 struct tu_bo *refcount_bo;
104
105 /* iova that this CS starts with in TU_CS_MODE_EXTERNAL */
106 uint64_t external_iova;
107
108 /* state for cond_exec_start/cond_exec_end */
109 uint32_t cond_stack_depth;
110 uint32_t cond_flags[TU_COND_EXEC_STACK_SIZE];
111 uint32_t *cond_dwords[TU_COND_EXEC_STACK_SIZE];
112
113 uint32_t breadcrumb_emit_after;
114 };
115
116 void
117 tu_breadcrumbs_init(struct tu_device *device);
118
119 void
120 tu_breadcrumbs_finish(struct tu_device *device);
121
122 void
123 tu_cs_init(struct tu_cs *cs,
124 struct tu_device *device,
125 enum tu_cs_mode mode,
126 uint32_t initial_size, const char *name);
127
128 void
129 tu_cs_init_external(struct tu_cs *cs, struct tu_device *device,
130 uint32_t *start, uint32_t *end, uint64_t iova,
131 bool writeable);
132
133 void
134 tu_cs_init_suballoc(struct tu_cs *cs, struct tu_device *device,
135 struct tu_suballoc_bo *bo);
136
137 void
138 tu_cs_finish(struct tu_cs *cs);
139
140 void
141 tu_cs_begin(struct tu_cs *cs);
142
143 void
144 tu_cs_end(struct tu_cs *cs);
145
146 void
147 tu_cs_set_writeable(struct tu_cs *cs, bool writeable);
148
149 VkResult
150 tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs);
151
152 VkResult
153 tu_cs_alloc(struct tu_cs *cs,
154 uint32_t count,
155 uint32_t size,
156 struct tu_cs_memory *memory);
157
158 struct tu_cs_entry
159 tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs);
160
161 static inline struct tu_draw_state
tu_cs_end_draw_state(struct tu_cs * cs,struct tu_cs * sub_cs)162 tu_cs_end_draw_state(struct tu_cs *cs, struct tu_cs *sub_cs)
163 {
164 struct tu_cs_entry entry = tu_cs_end_sub_stream(cs, sub_cs);
165 return (struct tu_draw_state) {
166 .iova = entry.bo->iova + entry.offset,
167 .size = entry.size / sizeof(uint32_t),
168 .writeable = sub_cs->writeable,
169 };
170 }
171
172 VkResult
173 tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size);
174
175 uint64_t
176 tu_cs_get_cur_iova(const struct tu_cs *cs);
177
178 static inline struct tu_draw_state
tu_cs_draw_state(struct tu_cs * sub_cs,struct tu_cs * cs,uint32_t size)179 tu_cs_draw_state(struct tu_cs *sub_cs, struct tu_cs *cs, uint32_t size)
180 {
181 struct tu_cs_memory memory;
182
183 /* TODO: clean this up */
184 tu_cs_alloc(sub_cs, size, 1, &memory);
185 tu_cs_init_external(cs, sub_cs->device, memory.map, memory.map + size,
186 memory.iova, memory.writeable);
187 tu_cs_begin(cs);
188 tu_cs_reserve_space(cs, size);
189
190 return (struct tu_draw_state) {
191 .iova = memory.iova,
192 .size = size,
193 .writeable = sub_cs->writeable,
194 };
195 }
196
197 void
198 tu_cs_reset(struct tu_cs *cs);
199
200 VkResult
201 tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target);
202
203 /**
204 * Get the size of the command packets emitted since the last call to
205 * tu_cs_add_entry.
206 */
207 static inline uint32_t
tu_cs_get_size(const struct tu_cs * cs)208 tu_cs_get_size(const struct tu_cs *cs)
209 {
210 return cs->cur - cs->start;
211 }
212
213 /**
214 * Return true if there is no command packet emitted since the last call to
215 * tu_cs_add_entry.
216 */
217 static inline uint32_t
tu_cs_is_empty(const struct tu_cs * cs)218 tu_cs_is_empty(const struct tu_cs *cs)
219 {
220 return tu_cs_get_size(cs) == 0;
221 }
222
223 /**
224 * Discard all entries. This allows \a cs to be reused while keeping the
225 * existing BOs and command packets intact.
226 */
227 static inline void
tu_cs_discard_entries(struct tu_cs * cs)228 tu_cs_discard_entries(struct tu_cs *cs)
229 {
230 assert(cs->mode == TU_CS_MODE_GROW);
231 cs->entry_count = 0;
232 }
233
234 /**
235 * Get the size needed for tu_cs_emit_call.
236 */
237 static inline uint32_t
tu_cs_get_call_size(const struct tu_cs * cs)238 tu_cs_get_call_size(const struct tu_cs *cs)
239 {
240 assert(cs->mode == TU_CS_MODE_GROW);
241 /* each CP_INDIRECT_BUFFER needs 4 dwords */
242 return cs->entry_count * 4;
243 }
244
245 /**
246 * Assert that we did not exceed the reserved space.
247 */
248 static inline void
tu_cs_sanity_check(const struct tu_cs * cs)249 tu_cs_sanity_check(const struct tu_cs *cs)
250 {
251 assert(cs->start <= cs->cur);
252 assert(cs->cur <= cs->reserved_end);
253 assert(cs->reserved_end <= cs->end);
254 }
255
256 void
257 tu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt);
258
259 /**
260 * Emit a uint32_t value into a command stream, without boundary checking.
261 */
262 static inline void
tu_cs_emit(struct tu_cs * cs,uint32_t value)263 tu_cs_emit(struct tu_cs *cs, uint32_t value)
264 {
265 assert(cs->cur < cs->reserved_end);
266 *cs->cur = value;
267 ++cs->cur;
268
269 #if TU_BREADCRUMBS_ENABLED
270 cs->breadcrumb_emit_after--;
271 if (cs->breadcrumb_emit_after == 0)
272 tu_cs_emit_sync_breadcrumb(cs, -1, 0);
273 #endif
274 }
275
276 /**
277 * Emit an array of uint32_t into a command stream, without boundary checking.
278 */
279 static inline void
tu_cs_emit_array(struct tu_cs * cs,const uint32_t * values,uint32_t length)280 tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length)
281 {
282 assert(cs->cur + length <= cs->reserved_end);
283 memcpy(cs->cur, values, sizeof(uint32_t) * length);
284 cs->cur += length;
285 }
286
287 /**
288 * Get the size of the remaining space in the current BO.
289 */
290 static inline uint32_t
tu_cs_get_space(const struct tu_cs * cs)291 tu_cs_get_space(const struct tu_cs *cs)
292 {
293 return cs->end - cs->cur;
294 }
295
296 static inline void
tu_cs_reserve(struct tu_cs * cs,uint32_t reserved_size)297 tu_cs_reserve(struct tu_cs *cs, uint32_t reserved_size)
298 {
299 if (cs->mode != TU_CS_MODE_GROW) {
300 assert(tu_cs_get_space(cs) >= reserved_size);
301 assert(cs->reserved_end == cs->end);
302 return;
303 }
304
305 if (tu_cs_get_space(cs) >= reserved_size &&
306 cs->entry_count < cs->entry_capacity) {
307 cs->reserved_end = cs->cur + reserved_size;
308 return;
309 }
310
311 ASSERTED VkResult result = tu_cs_reserve_space(cs, reserved_size);
312 /* TODO: set this error in tu_cs and use it */
313 assert(result == VK_SUCCESS);
314 }
315
316 /**
317 * Emit a type-4 command packet header into a command stream.
318 */
319 static inline void
tu_cs_emit_pkt4(struct tu_cs * cs,uint16_t regindx,uint16_t cnt)320 tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt)
321 {
322 tu_cs_reserve(cs, cnt + 1);
323 tu_cs_emit(cs, pm4_pkt4_hdr(regindx, cnt));
324 }
325
326 /**
327 * Emit a type-7 command packet header into a command stream.
328 */
329 static inline void
tu_cs_emit_pkt7(struct tu_cs * cs,uint8_t opcode,uint16_t cnt)330 tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
331 {
332 #if TU_BREADCRUMBS_ENABLED
333 tu_cs_emit_sync_breadcrumb(cs, opcode, cnt + 1);
334 #endif
335
336 tu_cs_reserve(cs, cnt + 1);
337 tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt));
338 }
339
340 static inline void
tu_cs_emit_wfi(struct tu_cs * cs)341 tu_cs_emit_wfi(struct tu_cs *cs)
342 {
343 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
344 }
345
346 static inline void
tu_cs_emit_qw(struct tu_cs * cs,uint64_t value)347 tu_cs_emit_qw(struct tu_cs *cs, uint64_t value)
348 {
349 tu_cs_emit(cs, (uint32_t) value);
350 tu_cs_emit(cs, (uint32_t) (value >> 32));
351 }
352
353 static inline void
tu_cs_emit_write_reg(struct tu_cs * cs,uint16_t reg,uint32_t value)354 tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value)
355 {
356 tu_cs_emit_pkt4(cs, reg, 1);
357 tu_cs_emit(cs, value);
358 }
359
360 /**
361 * Emit a CP_INDIRECT_BUFFER command packet.
362 */
363 static inline void
tu_cs_emit_ib(struct tu_cs * cs,const struct tu_cs_entry * entry)364 tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry)
365 {
366 assert(entry->bo);
367 assert(entry->size && entry->offset + entry->size <= entry->bo->size);
368 assert(entry->size % sizeof(uint32_t) == 0);
369 assert(entry->offset % sizeof(uint32_t) == 0);
370
371 tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
372 tu_cs_emit_qw(cs, entry->bo->iova + entry->offset);
373 tu_cs_emit(cs, entry->size / sizeof(uint32_t));
374 }
375
376 /* for compute which isn't using SET_DRAW_STATE */
377 static inline void
tu_cs_emit_state_ib(struct tu_cs * cs,struct tu_draw_state state)378 tu_cs_emit_state_ib(struct tu_cs *cs, struct tu_draw_state state)
379 {
380 if (state.size) {
381 tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
382 tu_cs_emit_qw(cs, state.iova);
383 tu_cs_emit(cs, state.size);
384 }
385 }
386
387 /**
388 * Emit a CP_INDIRECT_BUFFER command packet for each entry in the target
389 * command stream.
390 */
391 static inline void
tu_cs_emit_call(struct tu_cs * cs,const struct tu_cs * target)392 tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target)
393 {
394 assert(target->mode == TU_CS_MODE_GROW);
395 for (uint32_t i = 0; i < target->entry_count; i++)
396 tu_cs_emit_ib(cs, target->entries + i);
397 }
398
399 /**
400 * Emit a CP_NOP with a string tail into the command stream.
401 */
402 void
403 tu_cs_emit_debug_string(struct tu_cs *cs, const char *string, int len);
404
405 void
406 tu_cs_emit_debug_magic_strv(struct tu_cs *cs,
407 uint32_t magic,
408 const char *fmt,
409 va_list args);
410
411 __attribute__((format(printf, 2, 3))) void
412 tu_cs_emit_debug_msg(struct tu_cs *cs, const char *fmt, ...);
413
414 /**
415 * Emit a single message into the CS that denote the calling function and any
416 * optional printf-style parameters when utrace markers are enabled.
417 */
418 #define TU_CS_DEBUG_MSG(CS, FORMAT_STRING, ...) \
419 do { \
420 if (unlikely(u_trace_markers_enabled(&(CS)->device->trace_context))) \
421 tu_cs_emit_debug_msg(CS, "%s(" FORMAT_STRING ")", __func__, \
422 ## __VA_ARGS__); \
423 } while (0)
424
425 typedef struct tu_cs *tu_debug_scope;
426
427 __attribute__((format(printf, 3, 4))) void
428 tu_cs_trace_start(struct u_trace_context *utctx,
429 void *cs,
430 const char *fmt,
431 ...);
432
433 __attribute__((format(printf, 3, 4))) void
434 tu_cs_trace_end(struct u_trace_context *utctx, void *cs, const char *fmt, ...);
435
436 /* Helpers for bracketing a large sequence of commands of unknown size inside
437 * a CP_COND_REG_EXEC packet.
438 */
439 static inline void
tu_cond_exec_start(struct tu_cs * cs,uint32_t cond_flags)440 tu_cond_exec_start(struct tu_cs *cs, uint32_t cond_flags)
441 {
442 assert(cs->mode == TU_CS_MODE_GROW);
443 assert(cs->cond_stack_depth < TU_COND_EXEC_STACK_SIZE);
444
445 ASSERTED enum compare_mode mode =
446 (enum compare_mode)((cond_flags & CP_COND_REG_EXEC_0_MODE__MASK) >>
447 CP_COND_REG_EXEC_0_MODE__SHIFT);
448 assert(mode == PRED_TEST || mode == RENDER_MODE || mode == THREAD_MODE);
449
450 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
451 tu_cs_emit(cs, cond_flags);
452
453 cs->cond_flags[cs->cond_stack_depth] = cond_flags;
454 cs->cond_dwords[cs->cond_stack_depth] = cs->cur;
455
456 /* Emit dummy DWORD field here */
457 tu_cs_emit(cs, RENDER_MODE_CP_COND_REG_EXEC_1_DWORDS(0));
458
459 cs->cond_stack_depth++;
460 }
461 #define CP_COND_EXEC_0_RENDER_MODE_GMEM \
462 (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_GMEM)
463 #define CP_COND_EXEC_0_RENDER_MODE_SYSMEM \
464 (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_SYSMEM)
465
466 static inline void
tu_cond_exec_end(struct tu_cs * cs)467 tu_cond_exec_end(struct tu_cs *cs)
468 {
469 assert(cs->cond_stack_depth > 0);
470 cs->cond_stack_depth--;
471
472 cs->cond_flags[cs->cond_stack_depth] = 0;
473 /* Subtract one here to account for the DWORD field itself. */
474 uint32_t cond_len = cs->cur - cs->cond_dwords[cs->cond_stack_depth] - 1;
475 if (cond_len) {
476 *cs->cond_dwords[cs->cond_stack_depth] = cond_len;
477 } else {
478 /* rewind the CS to drop the empty cond reg packet. */
479 cs->cur = cs->cur - 3;
480 }
481 }
482
483 uint64_t
484 tu_cs_emit_data_nop(struct tu_cs *cs,
485 const uint32_t *data,
486 uint32_t size,
487 uint32_t align);
488
489 /* Temporary struct for tracking a register state to be written, used by
490 * a6xx-pack.h and tu_cs_emit_regs()
491 */
492 struct tu_reg_value {
493 uint32_t reg;
494 uint64_t value;
495 struct tu_bo *bo;
496 bool is_address;
497 bool bo_write;
498 uint32_t bo_offset;
499 uint32_t bo_shift;
500 uint32_t bo_low;
501 };
502
503 #define fd_reg_pair tu_reg_value
504 #define __bo_type struct tu_bo *
505
506 #include "a6xx-pack.xml.h"
507 #include "adreno-pm4-pack.xml.h"
508
509 #define __assert_eq(a, b) \
510 do { \
511 if ((a) != (b)) { \
512 fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \
513 assert((a) == (b)); \
514 } \
515 } while (0)
516
517 #define __ONE_REG(i, regs) \
518 do { \
519 if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) { \
520 __assert_eq(regs[0].reg + i, regs[i].reg); \
521 if (regs[i].bo) { \
522 uint64_t v = regs[i].bo->iova + regs[i].bo_offset; \
523 v >>= regs[i].bo_shift; \
524 v <<= regs[i].bo_low; \
525 v |= regs[i].value; \
526 \
527 *p++ = v; \
528 *p++ = v >> 32; \
529 } else { \
530 *p++ = regs[i].value; \
531 if (regs[i].is_address) \
532 *p++ = regs[i].value >> 32; \
533 } \
534 } \
535 } while (0)
536
537 /* Emits a sequence of register writes in order using a pkt4. This will check
538 * (at runtime on a !NDEBUG build) that the registers were actually set up in
539 * order in the code.
540 *
541 * Note that references to buffers aren't automatically added to the CS,
542 * unlike in freedreno. We are clever in various places to avoid duplicating
543 * the reference add work.
544 *
545 * Also, 64-bit address registers don't have a way (currently) to set a 64-bit
546 * address without having a reference to a BO, since the .dword field in the
547 * register's struct is only 32-bit wide. We should fix this in the pack
548 * codegen later.
549 */
550 #define tu_cs_emit_regs(cs, ...) do { \
551 const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
552 unsigned count = ARRAY_SIZE(regs); \
553 \
554 STATIC_ASSERT(ARRAY_SIZE(regs) > 0); \
555 STATIC_ASSERT(ARRAY_SIZE(regs) <= 16); \
556 \
557 tu_cs_emit_pkt4((cs), regs[0].reg, count); \
558 uint32_t *p = (cs)->cur; \
559 __ONE_REG( 0, regs); \
560 __ONE_REG( 1, regs); \
561 __ONE_REG( 2, regs); \
562 __ONE_REG( 3, regs); \
563 __ONE_REG( 4, regs); \
564 __ONE_REG( 5, regs); \
565 __ONE_REG( 6, regs); \
566 __ONE_REG( 7, regs); \
567 __ONE_REG( 8, regs); \
568 __ONE_REG( 9, regs); \
569 __ONE_REG(10, regs); \
570 __ONE_REG(11, regs); \
571 __ONE_REG(12, regs); \
572 __ONE_REG(13, regs); \
573 __ONE_REG(14, regs); \
574 __ONE_REG(15, regs); \
575 (cs)->cur = p; \
576 } while (0)
577
578 #endif /* TU_CS_H */
579