1 /*
2 * Copyright 2024 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "panvk_utrace.h"
7
8 #include "genxml/cs_builder.h"
9 #include "panvk_cmd_buffer.h"
10 #include "panvk_device.h"
11 #include "panvk_priv_bo.h"
12
13 static void
cmd_write_timestamp(struct cs_builder * b,uint64_t addr)14 cmd_write_timestamp(struct cs_builder *b, uint64_t addr)
15 {
16 const struct cs_index addr_reg = cs_scratch_reg64(b, 0);
17 /* abuse DEFERRED_SYNC */
18 const struct cs_async_op async = cs_defer(
19 SB_ALL_ITERS_MASK | SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC));
20
21 cs_move64_to(b, addr_reg, addr);
22 cs_store_state(b, addr_reg, 0, MALI_CS_STATE_TIMESTAMP, async);
23 }
24
25 static void
cmd_copy_data(struct cs_builder * b,uint64_t dst_addr,uint64_t src_addr,uint32_t size)26 cmd_copy_data(struct cs_builder *b, uint64_t dst_addr, uint64_t src_addr,
27 uint32_t size)
28 {
29 assert((dst_addr | src_addr | size) % sizeof(uint32_t) == 0);
30
31 /* wait for timestamp writes */
32 cs_wait_slot(b, SB_ID(DEFERRED_SYNC), false);
33
34 /* Depending on where this is called from, we could potentially use SR
35 * registers or copy with a compute job.
36 */
37 const struct cs_index dst_addr_reg = cs_scratch_reg64(b, 0);
38 const struct cs_index src_addr_reg = cs_scratch_reg64(b, 2);
39 const uint32_t temp_count = CS_REG_SCRATCH_COUNT - 4;
40
41 while (size) {
42 cs_move64_to(b, dst_addr_reg, dst_addr);
43 cs_move64_to(b, src_addr_reg, src_addr);
44
45 const uint32_t max_offset = 1 << 16;
46 uint32_t copy_count = MIN2(size, max_offset) / sizeof(uint32_t);
47 uint32_t offset = 0;
48 while (copy_count) {
49 const uint32_t count = MIN2(copy_count, temp_count);
50 const struct cs_index reg = cs_scratch_reg_tuple(b, 4, count);
51
52 cs_load_to(b, reg, src_addr_reg, BITFIELD_MASK(count), offset);
53 cs_wait_slot(b, SB_ID(LS), false);
54 cs_store(b, reg, dst_addr_reg, BITFIELD_MASK(count), offset);
55
56 copy_count -= count;
57 offset += count * sizeof(uint32_t);
58 }
59
60 dst_addr += offset;
61 src_addr += offset;
62 size -= offset;
63 }
64
65 cs_wait_slot(b, SB_ID(LS), false);
66 }
67
68 static struct cs_builder *
get_builder(struct panvk_cmd_buffer * cmdbuf,struct u_trace * ut)69 get_builder(struct panvk_cmd_buffer *cmdbuf, struct u_trace *ut)
70 {
71 const uint32_t subqueue = ut - cmdbuf->utrace.uts;
72 assert(subqueue < PANVK_SUBQUEUE_COUNT);
73
74 return panvk_get_cs_builder(cmdbuf, subqueue);
75 }
76
77 static void
panvk_utrace_record_ts(struct u_trace * ut,void * cs,void * timestamps,uint64_t offset_B,uint32_t flags)78 panvk_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
79 uint64_t offset_B, uint32_t flags)
80 {
81 struct cs_builder *b = get_builder(cs, ut);
82 const struct panvk_priv_bo *bo = timestamps;
83 const uint64_t addr = bo->addr.dev + offset_B;
84
85 cmd_write_timestamp(b, addr);
86 }
87
88 void
panvk_per_arch(utrace_context_init)89 panvk_per_arch(utrace_context_init)(struct panvk_device *dev)
90 {
91 u_trace_context_init(&dev->utrace.utctx, dev, sizeof(uint64_t), 0,
92 panvk_utrace_create_buffer, panvk_utrace_delete_buffer,
93 panvk_utrace_record_ts, panvk_utrace_read_ts, NULL,
94 NULL, panvk_utrace_delete_flush_data);
95 }
96
97 void
panvk_per_arch(utrace_context_fini)98 panvk_per_arch(utrace_context_fini)(struct panvk_device *dev)
99 {
100 u_trace_context_fini(&dev->utrace.utctx);
101 }
102
103 void
panvk_per_arch(utrace_copy_buffer)104 panvk_per_arch(utrace_copy_buffer)(struct u_trace_context *utctx,
105 void *cmdstream, void *ts_from,
106 uint64_t from_offset, void *ts_to,
107 uint64_t to_offset, uint64_t size_B)
108 {
109 struct cs_builder *b = cmdstream;
110 const struct panvk_priv_bo *src_bo = ts_from;
111 const struct panvk_priv_bo *dst_bo = ts_to;
112 const uint64_t src_addr = src_bo->addr.dev + from_offset;
113 const uint64_t dst_addr = dst_bo->addr.dev + to_offset;
114
115 cmd_copy_data(b, dst_addr, src_addr, size_B);
116 }
117
118 void
panvk_per_arch(utrace_clone_init_pool)119 panvk_per_arch(utrace_clone_init_pool)(struct panvk_pool *pool,
120 struct panvk_device *dev)
121 {
122 const struct panvk_pool_properties pool_props = {
123 .slab_size = 64 * 1024,
124 .label = "utrace clone pool",
125 .owns_bos = true,
126 };
127 panvk_pool_init(pool, dev, NULL, &pool_props);
128 }
129
130 static struct cs_buffer
alloc_clone_buffer(void * cookie)131 alloc_clone_buffer(void *cookie)
132 {
133 struct panvk_pool *pool = cookie;
134 const uint32_t size = 4 * 1024;
135 const uint32_t alignment = 64;
136
137 struct panfrost_ptr ptr =
138 pan_pool_alloc_aligned(&pool->base, size, alignment);
139
140 return (struct cs_buffer){
141 .cpu = ptr.cpu,
142 .gpu = ptr.gpu,
143 .capacity = size,
144 };
145 }
146
147 void
panvk_per_arch(utrace_clone_init_builder)148 panvk_per_arch(utrace_clone_init_builder)(struct cs_builder *b,
149 struct panvk_pool *pool)
150 {
151 const struct cs_builder_conf builder_conf = {
152 .nr_registers = 96,
153 .nr_kernel_registers = 4,
154 .alloc_buffer = alloc_clone_buffer,
155 .cookie = pool,
156 };
157 cs_builder_init(b, &builder_conf, (struct cs_buffer){0});
158 }
159
160 void
panvk_per_arch(utrace_clone_finish_builder)161 panvk_per_arch(utrace_clone_finish_builder)(struct cs_builder *b)
162 {
163 const struct cs_index flush_id = cs_scratch_reg32(b, 0);
164
165 cs_move32_to(b, flush_id, 0);
166 cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_NONE, false,
167 flush_id, cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
168 cs_wait_slot(b, SB_ID(IMM_FLUSH), false);
169
170 cs_finish(b);
171 }
172