1 /*
2 * Copyright © 2020 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <inttypes.h>
25 #include <stdarg.h>
26
27 #include "freedreno_log.h"
28 #include "freedreno_batch.h"
29 #include "freedreno_context.h"
30
31 #include "util/u_fifo.h"
32
33 /* A simple(ish) logging mechanism with timestamps recorded at the
34 * corresponding point in the cmdstream. The overall design is based
35 * on fd_log_chunk's, which buffer up up to 'msgs_per_chunk' log msgs
36 * and track an associated bo containing timestamps written from the
37 * GPU.
38 *
39 * The fd_batch tracks a list of fd_log_chunk's. When fd_log() is
40 * called, msgs are pushed into the last (most recent) chunk until
41 * it is full, at which point a new chunk is created. And cmds are
42 * inserted into the cmdstream to record the GPU timestamp that
43 * corresponds with the log msg.
44 *
45 * When the batch is flushed, the list of log chunks is transferred
46 * to the end of fd_context's list of chunks, and we attempt to pop
47 * chunks from the from of the list if their timestamp bo is idle (ie.
48 * the GPU has finished the batch that was writing the timestamps).
49 *
50 * NOTE: this is only appropriate for IB1 (ie. "gmem" level) cmdstream,
51 * the IB2 (draw/binning) cmdstream can be executed multiple times,
52 * which this mechanism is not designed to support. Other existing
53 * GL level queries (time-elapsed, amd-perfcntrs) are more appropriate
54 * for profiling at that level.
55 */
56
57 enum {
58 bo_size = 0x1000,
59 msgs_per_chunk = bo_size / sizeof(uint64_t),
60 };
61
62 struct fd_log_chunk {
63 struct list_head node;
64
65 unsigned num_msgs;
66 struct util_fifo *msg_fifo;
67
68 /* list of recorded 64b timestamps */
69 struct fd_bo *timestamps_bo;
70
71 bool last; /* this chunk is last in submit */
72 bool eof; /* this chunk is last in frame */
73 uint32_t *ring_cur;
74 };
75
76 static struct fd_log_chunk *
get_chunk(struct fd_batch * batch)77 get_chunk(struct fd_batch *batch)
78 {
79 struct fd_log_chunk *chunk;
80
81 /* do we currently have a non-full chunk to append msgs to? */
82 if (!list_is_empty(&batch->log_chunks)) {
83 chunk = list_last_entry(&batch->log_chunks,
84 struct fd_log_chunk, node);
85 if (chunk->num_msgs < msgs_per_chunk)
86 return chunk;
87 /* we need to expand to add another chunk to the batch, so
88 * the current one is no longer the last one of the batch:
89 */
90 chunk->last = false;
91 }
92
93 /* .. if not, then create a new one: */
94 chunk = calloc(1, sizeof(*chunk));
95 chunk->msg_fifo = u_fifo_create(msgs_per_chunk);
96 chunk->timestamps_bo = fd_bo_new(batch->ctx->screen->dev, bo_size,
97 DRM_FREEDRENO_GEM_TYPE_KMEM, "timestamps");
98 chunk->last = true;
99
100 list_addtail(&chunk->node, &batch->log_chunks);
101
102 return chunk;
103 }
104
105 static void
free_chunk(struct fd_log_chunk * chunk)106 free_chunk(struct fd_log_chunk *chunk)
107 {
108 assert(chunk->msg_fifo->num == 0);
109 u_fifo_destroy(chunk->msg_fifo);
110 fd_bo_del(chunk->timestamps_bo);
111 list_del(&chunk->node);
112 free(chunk);
113 }
114
115 /* state to accumulate time across N chunks associated with a single batch: */
116 struct times {
117 uint64_t last_time_ns;
118 uint64_t first_time_ns;
119 };
120
121 static void
process_chunk(struct fd_context * ctx,struct fd_log_chunk * chunk,struct times * t)122 process_chunk(struct fd_context *ctx, struct fd_log_chunk *chunk, struct times *t)
123 {
124 /* For first chunk of batch, accumulated times will be zerod: */
125 if (!t->last_time_ns) {
126 fprintf(ctx->log_out,
127 "+----- TS -----+ +----- NS -----+ +-- Δ --+ +----- MSG -----\n");
128 }
129
130 uint64_t *timestamps = fd_bo_map(chunk->timestamps_bo);
131 unsigned n = 0;
132 char *msg;
133
134 while (u_fifo_pop(chunk->msg_fifo, (void **)&msg)) {
135 uint64_t ts = timestamps[n++];
136 uint64_t ns = ctx->ts_to_ns(ts);
137 int32_t delta;
138
139 if (!t->first_time_ns)
140 t->first_time_ns = ns;
141
142 if (ns) {
143 delta = t->last_time_ns ? ns - t->last_time_ns : 0;
144 t->last_time_ns = ns;
145 } else {
146 /* we skipped recording the timestamp, so it should be
147 * the same as last msg:
148 */
149 ns = t->last_time_ns;
150 delta = 0;
151 }
152
153 fprintf(ctx->log_out, "%016"PRIu64" %016"PRIu64" %+9d: %s\n",
154 ts, ns, delta, msg);
155 free(msg);
156
157 }
158
159 if (chunk->last) {
160 fprintf(ctx->log_out, "ELAPSED: %"PRIu64" ns\n",
161 t->last_time_ns - t->first_time_ns);
162 memset(t, 0, sizeof(*t));
163 }
164
165 if (chunk->eof)
166 fprintf(ctx->log_out, "END OF FRAME %u\n", ctx->frame_nr++);
167 }
168
169 void
fd_log_process(struct fd_context * ctx,bool wait)170 fd_log_process(struct fd_context *ctx, bool wait)
171 {
172 struct times times = {0};
173
174 while (!list_is_empty(&ctx->log_chunks)) {
175 struct fd_log_chunk *chunk = list_first_entry(&ctx->log_chunks,
176 struct fd_log_chunk, node);
177
178 unsigned flags = DRM_FREEDRENO_PREP_READ;
179 if (!wait)
180 flags |= DRM_FREEDRENO_PREP_NOSYNC;
181
182 int ret = fd_bo_cpu_prep(chunk->timestamps_bo, ctx->pipe, flags);
183 if (ret)
184 break;
185
186 process_chunk(ctx, chunk, ×);
187 free_chunk(chunk);
188 }
189
190 /* We expect that the last processed chunk was the last in it's
191 * batch, which should reset the times:
192 */
193 if (times.last_time_ns) {
194 fprintf(ctx->log_out, "WARNING: last processed chunk not last in batch?");
195 }
196
197 fflush(ctx->log_out);
198 }
199
200 void
fd_log_flush(struct fd_batch * batch)201 fd_log_flush(struct fd_batch *batch)
202 {
203 /* transfer batch's log chunks to context: */
204 list_splicetail(&batch->log_chunks, &batch->ctx->log_chunks);
205 list_inithead(&batch->log_chunks);
206 }
207
208 void
_fd_log(struct fd_batch * batch,const char * fmt,...)209 _fd_log(struct fd_batch *batch, const char *fmt, ...)
210 {
211 struct fd_context *ctx = batch->ctx;
212 struct fd_ringbuffer *ring = batch->nondraw ? batch->draw : batch->gmem;
213 struct fd_log_chunk *chunk = get_chunk(batch);
214 char *msg;
215
216 va_list ap;
217 va_start(ap, fmt);
218 if (vasprintf(&msg, fmt, ap) < 0) {
219 va_end(ap);
220 return;
221 }
222 va_end(ap);
223
224 u_fifo_add(chunk->msg_fifo, msg);
225
226 assert(ctx->record_timestamp);
227
228 /* If nothing else has been emitted to the ring since the last log msg,
229 * skip emitting another timestamp.
230 */
231 if (ring->cur == chunk->ring_cur) {
232 uint64_t *ts = fd_bo_map(chunk->timestamps_bo);
233 /* zero signifies an invalid timestamp to process_chunk(), so it
234 * will use the last valid timestamp for this msg instead.
235 */
236 ts[chunk->num_msgs] = 0;
237 } else {
238 ctx->record_timestamp(ring, chunk->timestamps_bo,
239 chunk->num_msgs * sizeof(uint64_t));
240 }
241
242 chunk->num_msgs++;
243 chunk->ring_cur = ring->cur;
244 }
245
fd_log_eof(struct fd_context * ctx)246 void fd_log_eof(struct fd_context *ctx)
247 {
248 if (!(fd_mesa_debug & FD_DBG_LOG))
249 return;
250
251 if (list_is_empty(&ctx->log_chunks)) {
252 fprintf(ctx->log_out, "WARNING: no log chunks!\n");
253 return;
254 }
255
256 struct fd_log_chunk *last_chunk = list_last_entry(&ctx->log_chunks,
257 struct fd_log_chunk, node);
258 last_chunk->eof = true;
259
260 /* and process any chunks that are now idle/ready: */
261 fd_log_process(ctx, false);
262 }
263