• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #ifndef TU_CS_H
7 #define TU_CS_H
8 
9 #include "tu_common.h"
10 
11 #include "freedreno_pm4.h"
12 
13 #include "tu_drm.h"
14 
15 /* For breadcrumbs we may open a network socket based on the envvar,
16  * it's not something that should be enabled by default.
17  */
18 #define TU_BREADCRUMBS_ENABLED 0
19 
20 enum tu_cs_mode
21 {
22 
23    /*
24     * A command stream in TU_CS_MODE_GROW mode grows automatically whenever it
25     * is full.  tu_cs_begin must be called before command packet emission and
26     * tu_cs_end must be called after.
27     *
28     * This mode may create multiple entries internally.  The entries must be
29     * submitted together.
30     */
31    TU_CS_MODE_GROW,
32 
33    /*
34     * A command stream in TU_CS_MODE_EXTERNAL mode wraps an external,
35     * fixed-size buffer.  tu_cs_begin and tu_cs_end are optional and have no
36     * effect on it.
37     *
38     * This mode does not create any entry or any BO.
39     */
40    TU_CS_MODE_EXTERNAL,
41 
42    /*
43     * A command stream in TU_CS_MODE_SUB_STREAM mode does not support direct
44     * command packet emission.  tu_cs_begin_sub_stream must be called to get a
45     * sub-stream to emit comamnd packets to.  When done with the sub-stream,
46     * tu_cs_end_sub_stream must be called.
47     *
48     * This mode does not create any entry internally.
49     */
50    TU_CS_MODE_SUB_STREAM,
51 };
52 
53 struct tu_cs_entry
54 {
55    /* No ownership */
56    const struct tu_bo *bo;
57 
58    uint32_t size;
59    uint32_t offset;
60 };
61 
62 struct tu_cs_memory {
63    uint32_t *map;
64    uint64_t iova;
65 };
66 
67 struct tu_draw_state {
68    uint64_t iova : 48;
69    uint32_t size : 16;
70 };
71 
72 #define TU_COND_EXEC_STACK_SIZE 4
73 
74 struct tu_cs
75 {
76    uint32_t *start;
77    uint32_t *cur;
78    uint32_t *reserved_end;
79    uint32_t *end;
80 
81    struct tu_device *device;
82    enum tu_cs_mode mode;
83    uint32_t next_bo_size;
84 
85    struct tu_cs_entry *entries;
86    uint32_t entry_count;
87    uint32_t entry_capacity;
88 
89    struct tu_bo **bos;
90    uint32_t bo_count;
91    uint32_t bo_capacity;
92 
93    /* Optional BO that this CS is sub-allocated from for TU_CS_MODE_SUB_STREAM */
94    struct tu_bo *refcount_bo;
95 
96    /* state for cond_exec_start/cond_exec_end */
97    uint32_t cond_stack_depth;
98    uint32_t cond_flags[TU_COND_EXEC_STACK_SIZE];
99    uint32_t *cond_dwords[TU_COND_EXEC_STACK_SIZE];
100 
101    uint32_t breadcrumb_emit_after;
102 };
103 
104 void
105 tu_breadcrumbs_init(struct tu_device *device);
106 
107 void
108 tu_breadcrumbs_finish(struct tu_device *device);
109 
110 void
111 tu_cs_init(struct tu_cs *cs,
112            struct tu_device *device,
113            enum tu_cs_mode mode,
114            uint32_t initial_size);
115 
116 void
117 tu_cs_init_external(struct tu_cs *cs, struct tu_device *device,
118                     uint32_t *start, uint32_t *end);
119 
120 void
121 tu_cs_init_suballoc(struct tu_cs *cs, struct tu_device *device,
122                     struct tu_suballoc_bo *bo);
123 
124 void
125 tu_cs_finish(struct tu_cs *cs);
126 
127 void
128 tu_cs_begin(struct tu_cs *cs);
129 
130 void
131 tu_cs_end(struct tu_cs *cs);
132 
133 VkResult
134 tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs);
135 
136 VkResult
137 tu_cs_alloc(struct tu_cs *cs,
138             uint32_t count,
139             uint32_t size,
140             struct tu_cs_memory *memory);
141 
142 struct tu_cs_entry
143 tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs);
144 
145 static inline struct tu_draw_state
tu_cs_end_draw_state(struct tu_cs * cs,struct tu_cs * sub_cs)146 tu_cs_end_draw_state(struct tu_cs *cs, struct tu_cs *sub_cs)
147 {
148    struct tu_cs_entry entry = tu_cs_end_sub_stream(cs, sub_cs);
149    return (struct tu_draw_state) {
150       .iova = entry.bo->iova + entry.offset,
151       .size = entry.size / sizeof(uint32_t),
152    };
153 }
154 
155 VkResult
156 tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size);
157 
158 static inline struct tu_draw_state
tu_cs_draw_state(struct tu_cs * sub_cs,struct tu_cs * cs,uint32_t size)159 tu_cs_draw_state(struct tu_cs *sub_cs, struct tu_cs *cs, uint32_t size)
160 {
161    struct tu_cs_memory memory;
162 
163    /* TODO: clean this up */
164    tu_cs_alloc(sub_cs, size, 1, &memory);
165    tu_cs_init_external(cs, sub_cs->device, memory.map, memory.map + size);
166    tu_cs_begin(cs);
167    tu_cs_reserve_space(cs, size);
168 
169    return (struct tu_draw_state) {
170       .iova = memory.iova,
171       .size = size,
172    };
173 }
174 
175 void
176 tu_cs_reset(struct tu_cs *cs);
177 
178 VkResult
179 tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target);
180 
181 /**
182  * Get the size of the command packets emitted since the last call to
183  * tu_cs_add_entry.
184  */
185 static inline uint32_t
tu_cs_get_size(const struct tu_cs * cs)186 tu_cs_get_size(const struct tu_cs *cs)
187 {
188    return cs->cur - cs->start;
189 }
190 
191 /**
192  * Return true if there is no command packet emitted since the last call to
193  * tu_cs_add_entry.
194  */
195 static inline uint32_t
tu_cs_is_empty(const struct tu_cs * cs)196 tu_cs_is_empty(const struct tu_cs *cs)
197 {
198    return tu_cs_get_size(cs) == 0;
199 }
200 
201 /**
202  * Discard all entries.  This allows \a cs to be reused while keeping the
203  * existing BOs and command packets intact.
204  */
205 static inline void
tu_cs_discard_entries(struct tu_cs * cs)206 tu_cs_discard_entries(struct tu_cs *cs)
207 {
208    assert(cs->mode == TU_CS_MODE_GROW);
209    cs->entry_count = 0;
210 }
211 
212 /**
213  * Get the size needed for tu_cs_emit_call.
214  */
215 static inline uint32_t
tu_cs_get_call_size(const struct tu_cs * cs)216 tu_cs_get_call_size(const struct tu_cs *cs)
217 {
218    assert(cs->mode == TU_CS_MODE_GROW);
219    /* each CP_INDIRECT_BUFFER needs 4 dwords */
220    return cs->entry_count * 4;
221 }
222 
223 /**
224  * Assert that we did not exceed the reserved space.
225  */
226 static inline void
tu_cs_sanity_check(const struct tu_cs * cs)227 tu_cs_sanity_check(const struct tu_cs *cs)
228 {
229    assert(cs->start <= cs->cur);
230    assert(cs->cur <= cs->reserved_end);
231    assert(cs->reserved_end <= cs->end);
232 }
233 
234 void
235 tu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt);
236 
237 /**
238  * Emit a uint32_t value into a command stream, without boundary checking.
239  */
240 static inline void
tu_cs_emit(struct tu_cs * cs,uint32_t value)241 tu_cs_emit(struct tu_cs *cs, uint32_t value)
242 {
243    assert(cs->cur < cs->reserved_end);
244    *cs->cur = value;
245    ++cs->cur;
246 
247 #if TU_BREADCRUMBS_ENABLED
248    cs->breadcrumb_emit_after--;
249    if (cs->breadcrumb_emit_after == 0)
250       tu_cs_emit_sync_breadcrumb(cs, -1, 0);
251 #endif
252 }
253 
254 /**
255  * Emit an array of uint32_t into a command stream, without boundary checking.
256  */
257 static inline void
tu_cs_emit_array(struct tu_cs * cs,const uint32_t * values,uint32_t length)258 tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length)
259 {
260    assert(cs->cur + length <= cs->reserved_end);
261    memcpy(cs->cur, values, sizeof(uint32_t) * length);
262    cs->cur += length;
263 }
264 
265 /**
266  * Get the size of the remaining space in the current BO.
267  */
268 static inline uint32_t
tu_cs_get_space(const struct tu_cs * cs)269 tu_cs_get_space(const struct tu_cs *cs)
270 {
271    return cs->end - cs->cur;
272 }
273 
274 static inline void
tu_cs_reserve(struct tu_cs * cs,uint32_t reserved_size)275 tu_cs_reserve(struct tu_cs *cs, uint32_t reserved_size)
276 {
277    if (cs->mode != TU_CS_MODE_GROW) {
278       assert(tu_cs_get_space(cs) >= reserved_size);
279       assert(cs->reserved_end == cs->end);
280       return;
281    }
282 
283    if (tu_cs_get_space(cs) >= reserved_size &&
284        cs->entry_count < cs->entry_capacity) {
285       cs->reserved_end = cs->cur + reserved_size;
286       return;
287    }
288 
289    ASSERTED VkResult result = tu_cs_reserve_space(cs, reserved_size);
290    /* TODO: set this error in tu_cs and use it */
291    assert(result == VK_SUCCESS);
292 }
293 
294 /**
295  * Emit a type-4 command packet header into a command stream.
296  */
297 static inline void
tu_cs_emit_pkt4(struct tu_cs * cs,uint16_t regindx,uint16_t cnt)298 tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt)
299 {
300    tu_cs_reserve(cs, cnt + 1);
301    tu_cs_emit(cs, pm4_pkt4_hdr(regindx, cnt));
302 }
303 
304 /**
305  * Emit a type-7 command packet header into a command stream.
306  */
307 static inline void
tu_cs_emit_pkt7(struct tu_cs * cs,uint8_t opcode,uint16_t cnt)308 tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
309 {
310 #if TU_BREADCRUMBS_ENABLED
311    tu_cs_emit_sync_breadcrumb(cs, opcode, cnt + 1);
312 #endif
313 
314    tu_cs_reserve(cs, cnt + 1);
315    tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt));
316 }
317 
318 static inline void
tu_cs_emit_wfi(struct tu_cs * cs)319 tu_cs_emit_wfi(struct tu_cs *cs)
320 {
321    tu_cs_emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
322 }
323 
324 static inline void
tu_cs_emit_qw(struct tu_cs * cs,uint64_t value)325 tu_cs_emit_qw(struct tu_cs *cs, uint64_t value)
326 {
327    tu_cs_emit(cs, (uint32_t) value);
328    tu_cs_emit(cs, (uint32_t) (value >> 32));
329 }
330 
331 static inline void
tu_cs_emit_write_reg(struct tu_cs * cs,uint16_t reg,uint32_t value)332 tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value)
333 {
334    tu_cs_emit_pkt4(cs, reg, 1);
335    tu_cs_emit(cs, value);
336 }
337 
338 /**
339  * Emit a CP_INDIRECT_BUFFER command packet.
340  */
341 static inline void
tu_cs_emit_ib(struct tu_cs * cs,const struct tu_cs_entry * entry)342 tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry)
343 {
344    assert(entry->bo);
345    assert(entry->size && entry->offset + entry->size <= entry->bo->size);
346    assert(entry->size % sizeof(uint32_t) == 0);
347    assert(entry->offset % sizeof(uint32_t) == 0);
348 
349    tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
350    tu_cs_emit_qw(cs, entry->bo->iova + entry->offset);
351    tu_cs_emit(cs, entry->size / sizeof(uint32_t));
352 }
353 
354 /* for compute which isn't using SET_DRAW_STATE */
355 static inline void
tu_cs_emit_state_ib(struct tu_cs * cs,struct tu_draw_state state)356 tu_cs_emit_state_ib(struct tu_cs *cs, struct tu_draw_state state)
357 {
358    if (state.size) {
359       tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
360       tu_cs_emit_qw(cs, state.iova);
361       tu_cs_emit(cs, state.size);
362    }
363 }
364 
365 /**
366  * Emit a CP_INDIRECT_BUFFER command packet for each entry in the target
367  * command stream.
368  */
369 static inline void
tu_cs_emit_call(struct tu_cs * cs,const struct tu_cs * target)370 tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target)
371 {
372    assert(target->mode == TU_CS_MODE_GROW);
373    for (uint32_t i = 0; i < target->entry_count; i++)
374       tu_cs_emit_ib(cs, target->entries + i);
375 }
376 
377 /* Helpers for bracketing a large sequence of commands of unknown size inside
378  * a CP_COND_REG_EXEC packet.
379  */
380 static inline void
tu_cond_exec_start(struct tu_cs * cs,uint32_t cond_flags)381 tu_cond_exec_start(struct tu_cs *cs, uint32_t cond_flags)
382 {
383    assert(cs->mode == TU_CS_MODE_GROW);
384    assert(cs->cond_stack_depth < TU_COND_EXEC_STACK_SIZE);
385 
386    tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
387    tu_cs_emit(cs, cond_flags);
388 
389    cs->cond_flags[cs->cond_stack_depth] = cond_flags;
390    cs->cond_dwords[cs->cond_stack_depth] = cs->cur;
391 
392    /* Emit dummy DWORD field here */
393    tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0));
394 
395    cs->cond_stack_depth++;
396 }
397 #define CP_COND_EXEC_0_RENDER_MODE_GMEM \
398    (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_GMEM)
399 #define CP_COND_EXEC_0_RENDER_MODE_SYSMEM \
400    (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_SYSMEM)
401 
402 static inline void
tu_cond_exec_end(struct tu_cs * cs)403 tu_cond_exec_end(struct tu_cs *cs)
404 {
405    assert(cs->cond_stack_depth > 0);
406    cs->cond_stack_depth--;
407 
408    cs->cond_flags[cs->cond_stack_depth] = 0;
409    /* Subtract one here to account for the DWORD field itself. */
410    *cs->cond_dwords[cs->cond_stack_depth] =
411       cs->cur - cs->cond_dwords[cs->cond_stack_depth] - 1;
412 }
413 
414 /* Temporary struct for tracking a register state to be written, used by
415  * a6xx-pack.h and tu_cs_emit_regs()
416  */
417 struct tu_reg_value {
418    uint32_t reg;
419    uint64_t value;
420    bool is_address;
421    struct tu_bo *bo;
422    bool bo_write;
423    uint32_t bo_offset;
424    uint32_t bo_shift;
425 };
426 
427 #define fd_reg_pair tu_reg_value
428 #define __bo_type struct tu_bo *
429 
430 #include "a6xx-pack.xml.h"
431 
432 #define __assert_eq(a, b)                                               \
433    do {                                                                 \
434       if ((a) != (b)) {                                                 \
435          fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \
436          assert((a) == (b));                                            \
437       }                                                                 \
438    } while (0)
439 
440 #define __ONE_REG(i, regs)                                      \
441    do {                                                         \
442       if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) {            \
443          __assert_eq(regs[0].reg + i, regs[i].reg);             \
444          if (regs[i].bo) {                                      \
445             uint64_t v = regs[i].bo->iova + regs[i].bo_offset;  \
446             v >>= regs[i].bo_shift;                             \
447             v |= regs[i].value;                                 \
448                                                                 \
449             *p++ = v;                                           \
450             *p++ = v >> 32;                                     \
451          } else {                                               \
452             *p++ = regs[i].value;                               \
453             if (regs[i].is_address)                             \
454                *p++ = regs[i].value >> 32;                      \
455          }                                                      \
456       }                                                         \
457    } while (0)
458 
459 /* Emits a sequence of register writes in order using a pkt4.  This will check
460  * (at runtime on a !NDEBUG build) that the registers were actually set up in
461  * order in the code.
462  *
463  * Note that references to buffers aren't automatically added to the CS,
464  * unlike in freedreno.  We are clever in various places to avoid duplicating
465  * the reference add work.
466  *
467  * Also, 64-bit address registers don't have a way (currently) to set a 64-bit
468  * address without having a reference to a BO, since the .dword field in the
469  * register's struct is only 32-bit wide.  We should fix this in the pack
470  * codegen later.
471  */
472 #define tu_cs_emit_regs(cs, ...) do {                   \
473    const struct fd_reg_pair regs[] = { __VA_ARGS__ };   \
474    unsigned count = ARRAY_SIZE(regs);                   \
475                                                         \
476    STATIC_ASSERT(ARRAY_SIZE(regs) > 0);                 \
477    STATIC_ASSERT(ARRAY_SIZE(regs) <= 16);               \
478                                                         \
479    tu_cs_emit_pkt4((cs), regs[0].reg, count);             \
480    uint32_t *p = (cs)->cur;                               \
481    __ONE_REG( 0, regs);                                 \
482    __ONE_REG( 1, regs);                                 \
483    __ONE_REG( 2, regs);                                 \
484    __ONE_REG( 3, regs);                                 \
485    __ONE_REG( 4, regs);                                 \
486    __ONE_REG( 5, regs);                                 \
487    __ONE_REG( 6, regs);                                 \
488    __ONE_REG( 7, regs);                                 \
489    __ONE_REG( 8, regs);                                 \
490    __ONE_REG( 9, regs);                                 \
491    __ONE_REG(10, regs);                                 \
492    __ONE_REG(11, regs);                                 \
493    __ONE_REG(12, regs);                                 \
494    __ONE_REG(13, regs);                                 \
495    __ONE_REG(14, regs);                                 \
496    __ONE_REG(15, regs);                                 \
497    (cs)->cur = p;                                         \
498    } while (0)
499 
500 #endif /* TU_CS_H */
501