• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "panvk_utrace.h"
7 
8 #include "genxml/cs_builder.h"
9 #include "panvk_cmd_buffer.h"
10 #include "panvk_device.h"
11 #include "panvk_priv_bo.h"
12 
13 static void
cmd_write_timestamp(struct cs_builder * b,uint64_t addr)14 cmd_write_timestamp(struct cs_builder *b, uint64_t addr)
15 {
16    const struct cs_index addr_reg = cs_scratch_reg64(b, 0);
17    /* abuse DEFERRED_SYNC */
18    const struct cs_async_op async = cs_defer(
19       SB_ALL_ITERS_MASK | SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC));
20 
21    cs_move64_to(b, addr_reg, addr);
22    cs_store_state(b, addr_reg, 0, MALI_CS_STATE_TIMESTAMP, async);
23 }
24 
25 static void
cmd_copy_data(struct cs_builder * b,uint64_t dst_addr,uint64_t src_addr,uint32_t size)26 cmd_copy_data(struct cs_builder *b, uint64_t dst_addr, uint64_t src_addr,
27               uint32_t size)
28 {
29    assert((dst_addr | src_addr | size) % sizeof(uint32_t) == 0);
30 
31    /* wait for timestamp writes */
32    cs_wait_slot(b, SB_ID(DEFERRED_SYNC), false);
33 
34    /* Depending on where this is called from, we could potentially use SR
35     * registers or copy with a compute job.
36     */
37    const struct cs_index dst_addr_reg = cs_scratch_reg64(b, 0);
38    const struct cs_index src_addr_reg = cs_scratch_reg64(b, 2);
39    const uint32_t temp_count = CS_REG_SCRATCH_COUNT - 4;
40 
41    while (size) {
42       cs_move64_to(b, dst_addr_reg, dst_addr);
43       cs_move64_to(b, src_addr_reg, src_addr);
44 
45       const uint32_t max_offset = 1 << 16;
46       uint32_t copy_count = MIN2(size, max_offset) / sizeof(uint32_t);
47       uint32_t offset = 0;
48       while (copy_count) {
49          const uint32_t count = MIN2(copy_count, temp_count);
50          const struct cs_index reg = cs_scratch_reg_tuple(b, 4, count);
51 
52          cs_load_to(b, reg, src_addr_reg, BITFIELD_MASK(count), offset);
53          cs_wait_slot(b, SB_ID(LS), false);
54          cs_store(b, reg, dst_addr_reg, BITFIELD_MASK(count), offset);
55 
56          copy_count -= count;
57          offset += count * sizeof(uint32_t);
58       }
59 
60       dst_addr += offset;
61       src_addr += offset;
62       size -= offset;
63    }
64 
65    cs_wait_slot(b, SB_ID(LS), false);
66 }
67 
68 static struct cs_builder *
get_builder(struct panvk_cmd_buffer * cmdbuf,struct u_trace * ut)69 get_builder(struct panvk_cmd_buffer *cmdbuf, struct u_trace *ut)
70 {
71    const uint32_t subqueue = ut - cmdbuf->utrace.uts;
72    assert(subqueue < PANVK_SUBQUEUE_COUNT);
73 
74    return panvk_get_cs_builder(cmdbuf, subqueue);
75 }
76 
77 static void
panvk_utrace_record_ts(struct u_trace * ut,void * cs,void * timestamps,uint64_t offset_B,uint32_t flags)78 panvk_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
79                        uint64_t offset_B, uint32_t flags)
80 {
81    struct cs_builder *b = get_builder(cs, ut);
82    const struct panvk_priv_bo *bo = timestamps;
83    const uint64_t addr = bo->addr.dev + offset_B;
84 
85    cmd_write_timestamp(b, addr);
86 }
87 
88 void
panvk_per_arch(utrace_context_init)89 panvk_per_arch(utrace_context_init)(struct panvk_device *dev)
90 {
91    u_trace_context_init(&dev->utrace.utctx, dev, sizeof(uint64_t), 0,
92                         panvk_utrace_create_buffer, panvk_utrace_delete_buffer,
93                         panvk_utrace_record_ts, panvk_utrace_read_ts, NULL,
94                         NULL, panvk_utrace_delete_flush_data);
95 }
96 
97 void
panvk_per_arch(utrace_context_fini)98 panvk_per_arch(utrace_context_fini)(struct panvk_device *dev)
99 {
100    u_trace_context_fini(&dev->utrace.utctx);
101 }
102 
103 void
panvk_per_arch(utrace_copy_buffer)104 panvk_per_arch(utrace_copy_buffer)(struct u_trace_context *utctx,
105                                    void *cmdstream, void *ts_from,
106                                    uint64_t from_offset, void *ts_to,
107                                    uint64_t to_offset, uint64_t size_B)
108 {
109    struct cs_builder *b = cmdstream;
110    const struct panvk_priv_bo *src_bo = ts_from;
111    const struct panvk_priv_bo *dst_bo = ts_to;
112    const uint64_t src_addr = src_bo->addr.dev + from_offset;
113    const uint64_t dst_addr = dst_bo->addr.dev + to_offset;
114 
115    cmd_copy_data(b, dst_addr, src_addr, size_B);
116 }
117 
118 void
panvk_per_arch(utrace_clone_init_pool)119 panvk_per_arch(utrace_clone_init_pool)(struct panvk_pool *pool,
120                                        struct panvk_device *dev)
121 {
122    const struct panvk_pool_properties pool_props = {
123       .slab_size = 64 * 1024,
124       .label = "utrace clone pool",
125       .owns_bos = true,
126    };
127    panvk_pool_init(pool, dev, NULL, &pool_props);
128 }
129 
130 static struct cs_buffer
alloc_clone_buffer(void * cookie)131 alloc_clone_buffer(void *cookie)
132 {
133    struct panvk_pool *pool = cookie;
134    const uint32_t size = 4 * 1024;
135    const uint32_t alignment = 64;
136 
137    struct panfrost_ptr ptr =
138       pan_pool_alloc_aligned(&pool->base, size, alignment);
139 
140    return (struct cs_buffer){
141       .cpu = ptr.cpu,
142       .gpu = ptr.gpu,
143       .capacity = size,
144    };
145 }
146 
147 void
panvk_per_arch(utrace_clone_init_builder)148 panvk_per_arch(utrace_clone_init_builder)(struct cs_builder *b,
149                                           struct panvk_pool *pool)
150 {
151    const struct cs_builder_conf builder_conf = {
152       .nr_registers = 96,
153       .nr_kernel_registers = 4,
154       .alloc_buffer = alloc_clone_buffer,
155       .cookie = pool,
156    };
157    cs_builder_init(b, &builder_conf, (struct cs_buffer){0});
158 }
159 
160 void
panvk_per_arch(utrace_clone_finish_builder)161 panvk_per_arch(utrace_clone_finish_builder)(struct cs_builder *b)
162 {
163    const struct cs_index flush_id = cs_scratch_reg32(b, 0);
164 
165    cs_move32_to(b, flush_id, 0);
166    cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_NONE, false,
167                    flush_id, cs_defer(SB_IMM_MASK, SB_ID(IMM_FLUSH)));
168    cs_wait_slot(b, SB_ID(IMM_FLUSH), false);
169 
170    cs_finish(b);
171 }
172