/* * Copyright © 2019 Google LLC * SPDX-License-Identifier: MIT */ #ifndef TU_CS_H #define TU_CS_H #include "tu_common.h" #include "freedreno_pm4.h" #include "tu_drm.h" /* For breadcrumbs we may open a network socket based on the envvar, * it's not something that should be enabled by default. */ #define TU_BREADCRUMBS_ENABLED 0 enum tu_cs_mode { /* * A command stream in TU_CS_MODE_GROW mode grows automatically whenever it * is full. tu_cs_begin must be called before command packet emission and * tu_cs_end must be called after. * * This mode may create multiple entries internally. The entries must be * submitted together. */ TU_CS_MODE_GROW, /* * A command stream in TU_CS_MODE_EXTERNAL mode wraps an external, * fixed-size buffer. tu_cs_begin and tu_cs_end are optional and have no * effect on it. * * This mode does not create any entry or any BO. */ TU_CS_MODE_EXTERNAL, /* * A command stream in TU_CS_MODE_SUB_STREAM mode does not support direct * command packet emission. tu_cs_begin_sub_stream must be called to get a * sub-stream to emit comamnd packets to. When done with the sub-stream, * tu_cs_end_sub_stream must be called. * * This mode does not create any entry internally. */ TU_CS_MODE_SUB_STREAM, }; struct tu_cs_entry { /* No ownership */ const struct tu_bo *bo; uint32_t size; uint32_t offset; }; struct tu_cs_memory { uint32_t *map; uint64_t iova; }; struct tu_draw_state { uint64_t iova : 48; uint32_t size : 16; }; #define TU_COND_EXEC_STACK_SIZE 4 struct tu_cs { uint32_t *start; uint32_t *cur; uint32_t *reserved_end; uint32_t *end; struct tu_device *device; enum tu_cs_mode mode; uint32_t next_bo_size; struct tu_cs_entry *entries; uint32_t entry_count; uint32_t entry_capacity; struct tu_bo **bos; uint32_t bo_count; uint32_t bo_capacity; /* Optional BO that this CS is sub-allocated from for TU_CS_MODE_SUB_STREAM */ struct tu_bo *refcount_bo; /* state for cond_exec_start/cond_exec_end */ uint32_t cond_stack_depth; uint32_t cond_flags[TU_COND_EXEC_STACK_SIZE]; uint32_t *cond_dwords[TU_COND_EXEC_STACK_SIZE]; uint32_t breadcrumb_emit_after; }; void tu_breadcrumbs_init(struct tu_device *device); void tu_breadcrumbs_finish(struct tu_device *device); void tu_cs_init(struct tu_cs *cs, struct tu_device *device, enum tu_cs_mode mode, uint32_t initial_size); void tu_cs_init_external(struct tu_cs *cs, struct tu_device *device, uint32_t *start, uint32_t *end); void tu_cs_init_suballoc(struct tu_cs *cs, struct tu_device *device, struct tu_suballoc_bo *bo); void tu_cs_finish(struct tu_cs *cs); void tu_cs_begin(struct tu_cs *cs); void tu_cs_end(struct tu_cs *cs); VkResult tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs); VkResult tu_cs_alloc(struct tu_cs *cs, uint32_t count, uint32_t size, struct tu_cs_memory *memory); struct tu_cs_entry tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs); static inline struct tu_draw_state tu_cs_end_draw_state(struct tu_cs *cs, struct tu_cs *sub_cs) { struct tu_cs_entry entry = tu_cs_end_sub_stream(cs, sub_cs); return (struct tu_draw_state) { .iova = entry.bo->iova + entry.offset, .size = entry.size / sizeof(uint32_t), }; } VkResult tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size); static inline struct tu_draw_state tu_cs_draw_state(struct tu_cs *sub_cs, struct tu_cs *cs, uint32_t size) { struct tu_cs_memory memory; /* TODO: clean this up */ tu_cs_alloc(sub_cs, size, 1, &memory); tu_cs_init_external(cs, sub_cs->device, memory.map, memory.map + size); tu_cs_begin(cs); tu_cs_reserve_space(cs, size); return (struct tu_draw_state) { .iova = memory.iova, .size = size, }; } void tu_cs_reset(struct tu_cs *cs); VkResult tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target); /** * Get the size of the command packets emitted since the last call to * tu_cs_add_entry. */ static inline uint32_t tu_cs_get_size(const struct tu_cs *cs) { return cs->cur - cs->start; } /** * Return true if there is no command packet emitted since the last call to * tu_cs_add_entry. */ static inline uint32_t tu_cs_is_empty(const struct tu_cs *cs) { return tu_cs_get_size(cs) == 0; } /** * Discard all entries. This allows \a cs to be reused while keeping the * existing BOs and command packets intact. */ static inline void tu_cs_discard_entries(struct tu_cs *cs) { assert(cs->mode == TU_CS_MODE_GROW); cs->entry_count = 0; } /** * Get the size needed for tu_cs_emit_call. */ static inline uint32_t tu_cs_get_call_size(const struct tu_cs *cs) { assert(cs->mode == TU_CS_MODE_GROW); /* each CP_INDIRECT_BUFFER needs 4 dwords */ return cs->entry_count * 4; } /** * Assert that we did not exceed the reserved space. */ static inline void tu_cs_sanity_check(const struct tu_cs *cs) { assert(cs->start <= cs->cur); assert(cs->cur <= cs->reserved_end); assert(cs->reserved_end <= cs->end); } void tu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt); /** * Emit a uint32_t value into a command stream, without boundary checking. */ static inline void tu_cs_emit(struct tu_cs *cs, uint32_t value) { assert(cs->cur < cs->reserved_end); *cs->cur = value; ++cs->cur; #if TU_BREADCRUMBS_ENABLED cs->breadcrumb_emit_after--; if (cs->breadcrumb_emit_after == 0) tu_cs_emit_sync_breadcrumb(cs, -1, 0); #endif } /** * Emit an array of uint32_t into a command stream, without boundary checking. */ static inline void tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length) { assert(cs->cur + length <= cs->reserved_end); memcpy(cs->cur, values, sizeof(uint32_t) * length); cs->cur += length; } /** * Get the size of the remaining space in the current BO. */ static inline uint32_t tu_cs_get_space(const struct tu_cs *cs) { return cs->end - cs->cur; } static inline void tu_cs_reserve(struct tu_cs *cs, uint32_t reserved_size) { if (cs->mode != TU_CS_MODE_GROW) { assert(tu_cs_get_space(cs) >= reserved_size); assert(cs->reserved_end == cs->end); return; } if (tu_cs_get_space(cs) >= reserved_size && cs->entry_count < cs->entry_capacity) { cs->reserved_end = cs->cur + reserved_size; return; } ASSERTED VkResult result = tu_cs_reserve_space(cs, reserved_size); /* TODO: set this error in tu_cs and use it */ assert(result == VK_SUCCESS); } /** * Emit a type-4 command packet header into a command stream. */ static inline void tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt) { tu_cs_reserve(cs, cnt + 1); tu_cs_emit(cs, pm4_pkt4_hdr(regindx, cnt)); } /** * Emit a type-7 command packet header into a command stream. */ static inline void tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt) { #if TU_BREADCRUMBS_ENABLED tu_cs_emit_sync_breadcrumb(cs, opcode, cnt + 1); #endif tu_cs_reserve(cs, cnt + 1); tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt)); } static inline void tu_cs_emit_wfi(struct tu_cs *cs) { tu_cs_emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0); } static inline void tu_cs_emit_qw(struct tu_cs *cs, uint64_t value) { tu_cs_emit(cs, (uint32_t) value); tu_cs_emit(cs, (uint32_t) (value >> 32)); } static inline void tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value) { tu_cs_emit_pkt4(cs, reg, 1); tu_cs_emit(cs, value); } /** * Emit a CP_INDIRECT_BUFFER command packet. */ static inline void tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry) { assert(entry->bo); assert(entry->size && entry->offset + entry->size <= entry->bo->size); assert(entry->size % sizeof(uint32_t) == 0); assert(entry->offset % sizeof(uint32_t) == 0); tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3); tu_cs_emit_qw(cs, entry->bo->iova + entry->offset); tu_cs_emit(cs, entry->size / sizeof(uint32_t)); } /* for compute which isn't using SET_DRAW_STATE */ static inline void tu_cs_emit_state_ib(struct tu_cs *cs, struct tu_draw_state state) { if (state.size) { tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3); tu_cs_emit_qw(cs, state.iova); tu_cs_emit(cs, state.size); } } /** * Emit a CP_INDIRECT_BUFFER command packet for each entry in the target * command stream. */ static inline void tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target) { assert(target->mode == TU_CS_MODE_GROW); for (uint32_t i = 0; i < target->entry_count; i++) tu_cs_emit_ib(cs, target->entries + i); } /* Helpers for bracketing a large sequence of commands of unknown size inside * a CP_COND_REG_EXEC packet. */ static inline void tu_cond_exec_start(struct tu_cs *cs, uint32_t cond_flags) { assert(cs->mode == TU_CS_MODE_GROW); assert(cs->cond_stack_depth < TU_COND_EXEC_STACK_SIZE); tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2); tu_cs_emit(cs, cond_flags); cs->cond_flags[cs->cond_stack_depth] = cond_flags; cs->cond_dwords[cs->cond_stack_depth] = cs->cur; /* Emit dummy DWORD field here */ tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0)); cs->cond_stack_depth++; } #define CP_COND_EXEC_0_RENDER_MODE_GMEM \ (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_GMEM) #define CP_COND_EXEC_0_RENDER_MODE_SYSMEM \ (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_SYSMEM) static inline void tu_cond_exec_end(struct tu_cs *cs) { assert(cs->cond_stack_depth > 0); cs->cond_stack_depth--; cs->cond_flags[cs->cond_stack_depth] = 0; /* Subtract one here to account for the DWORD field itself. */ *cs->cond_dwords[cs->cond_stack_depth] = cs->cur - cs->cond_dwords[cs->cond_stack_depth] - 1; } /* Temporary struct for tracking a register state to be written, used by * a6xx-pack.h and tu_cs_emit_regs() */ struct tu_reg_value { uint32_t reg; uint64_t value; bool is_address; struct tu_bo *bo; bool bo_write; uint32_t bo_offset; uint32_t bo_shift; }; #define fd_reg_pair tu_reg_value #define __bo_type struct tu_bo * #include "a6xx-pack.xml.h" #define __assert_eq(a, b) \ do { \ if ((a) != (b)) { \ fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \ assert((a) == (b)); \ } \ } while (0) #define __ONE_REG(i, regs) \ do { \ if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) { \ __assert_eq(regs[0].reg + i, regs[i].reg); \ if (regs[i].bo) { \ uint64_t v = regs[i].bo->iova + regs[i].bo_offset; \ v >>= regs[i].bo_shift; \ v |= regs[i].value; \ \ *p++ = v; \ *p++ = v >> 32; \ } else { \ *p++ = regs[i].value; \ if (regs[i].is_address) \ *p++ = regs[i].value >> 32; \ } \ } \ } while (0) /* Emits a sequence of register writes in order using a pkt4. This will check * (at runtime on a !NDEBUG build) that the registers were actually set up in * order in the code. * * Note that references to buffers aren't automatically added to the CS, * unlike in freedreno. We are clever in various places to avoid duplicating * the reference add work. * * Also, 64-bit address registers don't have a way (currently) to set a 64-bit * address without having a reference to a BO, since the .dword field in the * register's struct is only 32-bit wide. We should fix this in the pack * codegen later. */ #define tu_cs_emit_regs(cs, ...) do { \ const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \ unsigned count = ARRAY_SIZE(regs); \ \ STATIC_ASSERT(ARRAY_SIZE(regs) > 0); \ STATIC_ASSERT(ARRAY_SIZE(regs) <= 16); \ \ tu_cs_emit_pkt4((cs), regs[0].reg, count); \ uint32_t *p = (cs)->cur; \ __ONE_REG( 0, regs); \ __ONE_REG( 1, regs); \ __ONE_REG( 2, regs); \ __ONE_REG( 3, regs); \ __ONE_REG( 4, regs); \ __ONE_REG( 5, regs); \ __ONE_REG( 6, regs); \ __ONE_REG( 7, regs); \ __ONE_REG( 8, regs); \ __ONE_REG( 9, regs); \ __ONE_REG(10, regs); \ __ONE_REG(11, regs); \ __ONE_REG(12, regs); \ __ONE_REG(13, regs); \ __ONE_REG(14, regs); \ __ONE_REG(15, regs); \ (cs)->cur = p; \ } while (0) #endif /* TU_CS_H */