• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #ifndef _U_TRACE_H
25 #define _U_TRACE_H
26 
27 #include <stdbool.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 
31 #include "util/macros.h"
32 #include "util/u_atomic.h"
33 #include "util/u_queue.h"
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 
39 /* A trace mechanism (very) loosely inspired by the linux kernel tracepoint
40  * mechanism, in that it allows for defining driver specific (or common)
41  * tracepoints, which generate 'trace_$name()' functions that can be
42  * called at various points in commandstream emit.
43  *
44  * Currently a printf backend is implemented, but the expectation is to
45  * also implement a perfetto backend for shipping out traces to a tool like
46  * AGI.
47  *
48  * Notable differences:
49  *
50  *  - GPU timestamps!  A driver provided callback is used to emit timestamps
51  *    to a buffer.  At a later point in time (when stalling to wait for the
52  *    GPU is not required), the timestamps are re-united with the trace
53  *    payload.  This makes the trace mechanism suitable for profiling.
54  *
55  *  - Instead of a systemwide trace ringbuffer, buffering of un-retired
56  *    tracepoints is split into two stages.  Traces are emitted to a
57  *    'u_trace' instance, and at a later time flushed to a 'u_trace_context'
58  *    instance.  This avoids the requirement that commandstream containing
59  *    tracepoints is emitted in the same order as it is generated.
60  *
61  *    If the hw has multiple parallel "engines" (for example, 3d/blit/compute)
62  *    then a `u_trace_context` per-engine should be used.
63  *
64  *  - Unlike kernel tracepoints, u_trace tracepoints are defined in py
65  *    from which header and src files are generated.  Since we already have
66  *    a build dependency on python+mako, this gives more flexibility than
67  *    clunky preprocessor macro magic.
68  *
69  */
70 
71 struct u_trace_context;
72 struct u_trace;
73 struct u_trace_chunk;
74 struct u_trace_printer;
75 
76 /**
77  * Special reserved value to indicate that no timestamp was captured,
78  * and that the timestamp of the previous trace should be reused.
79  */
80 #define U_TRACE_NO_TIMESTAMP ((uint64_t) 0)
81 
82 /**
83  * Driver provided callback to create a timestamp buffer which will be
84  * read by u_trace_read_ts function.
85  */
86 typedef void *(*u_trace_create_ts_buffer)(struct u_trace_context *utctx,
87                                           uint32_t timestamps_count);
88 
89 /**
90  * Driver provided callback to delete a timestamp buffer.
91  */
92 typedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx,
93                                          void *timestamps);
94 
95 /**
96  * Driver provided callback to emit commands into the soecified command
97  * stream to capture a 64b timestamp into the specified timestamps buffer,
98  * at the specified index.
99  *
100  * The hw counter that the driver records should be something that runs at
101  * a fixed rate, even as the GPU freq changes.  The same source used for
102  * GL_TIMESTAMP queries should be appropriate.
103  */
104 typedef void (*u_trace_record_ts)(struct u_trace *ut,
105                                   void *cs,
106                                   void *timestamps,
107                                   unsigned idx,
108                                   bool end_of_pipe);
109 
110 /**
111  * Driver provided callback to read back a previously recorded timestamp.
112  * If necessary, this should block until the GPU has finished writing back
113  * the timestamps.  (The timestamps will be read back in order, so it is
114  * safe to only synchronize on idx==0.)
115  *
116  * flush_data is data provided by the driver via u_trace_flush.
117  *
118  * The returned timestamp should be in units of nanoseconds.  The same
119  * timebase as GL_TIMESTAMP queries should be used.
120  *
121  * The driver can return the special U_TRACE_NO_TIMESTAMP value to indicate
122  * that no timestamp was captured and the timestamp from the previous trace
123  * will be re-used.  (The first trace in the u_trace buf may not do this.)
124  * This allows the driver to detect cases where multiple tracepoints are
125  * emitted with no other intervening cmdstream, to avoid pointlessly
126  * capturing the same timestamp multiple times in a row.
127  */
128 typedef uint64_t (*u_trace_read_ts)(struct u_trace_context *utctx,
129                                     void *timestamps,
130                                     unsigned idx,
131                                     void *flush_data);
132 
133 /**
134  * Driver provided callback to delete flush data.
135  */
136 typedef void (*u_trace_delete_flush_data)(struct u_trace_context *utctx,
137                                           void *flush_data);
138 
139 enum u_trace_type {
140    U_TRACE_TYPE_PRINT = 1u << 0,
141    U_TRACE_TYPE_JSON = 1u << 1,
142    U_TRACE_TYPE_PERFETTO_ACTIVE = 1u << 2,
143    U_TRACE_TYPE_PERFETTO_ENV = 1u << 3,
144    U_TRACE_TYPE_MARKERS = 1u << 4,
145 
146    U_TRACE_TYPE_PRINT_JSON = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_JSON,
147    U_TRACE_TYPE_PERFETTO =
148       U_TRACE_TYPE_PERFETTO_ACTIVE | U_TRACE_TYPE_PERFETTO_ENV,
149 
150    /*
151     * A mask of traces that require appending to the tracepoint chunk list.
152     */
153    U_TRACE_TYPE_REQUIRE_QUEUING = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO,
154    /*
155     * A mask of traces that require processing the tracepoint chunk list.
156     */
157    U_TRACE_TYPE_REQUIRE_PROCESSING =
158       U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO_ACTIVE,
159 };
160 
161 /**
162  * The trace context provides tracking for "in-flight" traces, once the
163  * cmdstream that records timestamps has been flushed.
164  */
165 struct u_trace_context {
166    /* All traces enabled in this context */
167    enum u_trace_type enabled_traces;
168 
169    void *pctx;
170 
171    u_trace_create_ts_buffer create_timestamp_buffer;
172    u_trace_delete_ts_buffer delete_timestamp_buffer;
173    u_trace_record_ts record_timestamp;
174    u_trace_read_ts read_timestamp;
175    u_trace_delete_flush_data delete_flush_data;
176 
177    FILE *out;
178    struct u_trace_printer *out_printer;
179 
180    /* Once u_trace_flush() is called u_trace_chunk's are queued up to
181     * render tracepoints on a queue.  The per-chunk queue jobs block until
182     * timestamps are available.
183     */
184    struct util_queue queue;
185 
186 #ifdef HAVE_PERFETTO
187    /* node in global list of trace contexts. */
188    struct list_head node;
189 #endif
190 
191    /* State to accumulate time across N chunks associated with a single
192     * batch (u_trace).
193     */
194    uint64_t last_time_ns;
195    uint64_t first_time_ns;
196 
197    uint32_t frame_nr;
198    uint32_t batch_nr;
199    uint32_t event_nr;
200    bool start_of_frame;
201 
202    /* list of unprocessed trace chunks in fifo order: */
203    struct list_head flushed_trace_chunks;
204 };
205 
206 /**
207  * The u_trace ptr is passed as the first arg to generated tracepoints.
208  * It provides buffering for tracepoint payload until the corresponding
209  * driver cmdstream containing the emitted commands to capture is
210  * flushed.
211  *
212  * Individual tracepoints emitted to u_trace are expected to be "executed"
213  * (ie. timestamp captured) in FIFO order with respect to other tracepoints
214  * emitted to the same u_trace.  But the order WRT other u_trace instances
215  * is undefined util u_trace_flush().
216  */
217 struct u_trace {
218    struct u_trace_context *utctx;
219 
220    uint32_t num_traces;
221 
222    struct list_head
223       trace_chunks; /* list of unflushed trace chunks in fifo order */
224 };
225 
226 void u_trace_context_init(struct u_trace_context *utctx,
227                           void *pctx,
228                           u_trace_create_ts_buffer create_timestamp_buffer,
229                           u_trace_delete_ts_buffer delete_timestamp_buffer,
230                           u_trace_record_ts record_timestamp,
231                           u_trace_read_ts read_timestamp,
232                           u_trace_delete_flush_data delete_flush_data);
233 void u_trace_context_fini(struct u_trace_context *utctx);
234 
235 /**
236  * Flush (trigger processing) of traces previously flushed to the
237  * trace-context by u_trace_flush().
238  *
239  * This should typically be called in the driver's pctx->flush().
240  */
241 void u_trace_context_process(struct u_trace_context *utctx, bool eof);
242 
243 void u_trace_init(struct u_trace *ut, struct u_trace_context *utctx);
244 void u_trace_fini(struct u_trace *ut);
245 
246 void u_trace_state_init(void);
247 bool u_trace_is_enabled(enum u_trace_type type);
248 
249 bool u_trace_has_points(struct u_trace *ut);
250 
251 struct u_trace_iterator {
252    struct u_trace *ut;
253    struct u_trace_chunk *chunk;
254    uint32_t event_idx;
255 };
256 
257 struct u_trace_iterator u_trace_begin_iterator(struct u_trace *ut);
258 
259 struct u_trace_iterator u_trace_end_iterator(struct u_trace *ut);
260 
261 bool u_trace_iterator_equal(struct u_trace_iterator a,
262                             struct u_trace_iterator b);
263 
264 typedef void (*u_trace_copy_ts_buffer)(struct u_trace_context *utctx,
265                                        void *cmdstream,
266                                        void *ts_from,
267                                        uint32_t from_offset,
268                                        void *ts_to,
269                                        uint32_t to_offset,
270                                        uint32_t count);
271 
272 /**
273  * Clones tracepoints range into target u_trace.
274  * Provides callback for driver to copy timestamps on GPU from
275  * one buffer to another.
276  *
277  * It allows:
278  * - Tracing re-usable command buffer in Vulkan, by copying tracepoints
279  *   each time it is submitted.
280  * - Per-tile tracing for tiling GPUs, by copying a range of tracepoints
281  *   corresponding to a tile.
282  */
283 void u_trace_clone_append(struct u_trace_iterator begin_it,
284                           struct u_trace_iterator end_it,
285                           struct u_trace *into,
286                           void *cmdstream,
287                           u_trace_copy_ts_buffer copy_ts_buffer);
288 
289 void u_trace_disable_event_range(struct u_trace_iterator begin_it,
290                                  struct u_trace_iterator end_it);
291 
292 /**
293  * Flush traces to the parent trace-context.  At this point, the expectation
294  * is that all the tracepoints are "executed" by the GPU following any
295  * previously flushed u_trace batch.
296  *
297  * flush_data is a way for driver to pass additional data, which becomes
298  * available only at the point of flush, to the u_trace_read_ts callback and
299  * perfetto. The typical example of such data would be a fence to wait on in
300  * u_trace_read_ts, and a submission_id to pass into perfetto. The destruction
301  * of the data is done via u_trace_delete_flush_data.
302  *
303  * This should typically be called when the corresponding cmdstream
304  * (containing the timestamp reads) is flushed to the kernel.
305  */
306 void u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data);
307 
308 #ifdef HAVE_PERFETTO
309 static ALWAYS_INLINE bool
u_trace_perfetto_active(struct u_trace_context * utctx)310 u_trace_perfetto_active(struct u_trace_context *utctx)
311 {
312    return p_atomic_read_relaxed(&utctx->enabled_traces) &
313           U_TRACE_TYPE_PERFETTO_ACTIVE;
314 }
315 
316 void u_trace_perfetto_start(void);
317 void u_trace_perfetto_stop(void);
318 #else
319 static ALWAYS_INLINE bool
u_trace_perfetto_active(UNUSED struct u_trace_context * utctx)320 u_trace_perfetto_active(UNUSED struct u_trace_context *utctx)
321 {
322    return false;
323 }
324 #endif
325 
326 /**
327  * Return whether utrace is enabled at all or not, this can be used to
328  * gate any expensive traces.
329  */
330 static ALWAYS_INLINE bool
u_trace_enabled(struct u_trace_context * utctx)331 u_trace_enabled(struct u_trace_context *utctx)
332 {
333    return p_atomic_read_relaxed(&utctx->enabled_traces) != 0;
334 }
335 
336 /**
337  * Return whether chunks should be processed or not.
338  */
339 static ALWAYS_INLINE bool
u_trace_should_process(struct u_trace_context * utctx)340 u_trace_should_process(struct u_trace_context *utctx)
341 {
342    return p_atomic_read_relaxed(&utctx->enabled_traces) &
343           U_TRACE_TYPE_REQUIRE_PROCESSING;
344 }
345 
346 /**
347  * Return whether to emit markers into the command stream even if the queue
348  * isn't active.
349  */
350 static ALWAYS_INLINE bool
u_trace_markers_enabled(struct u_trace_context * utctx)351 u_trace_markers_enabled(struct u_trace_context *utctx)
352 {
353    return p_atomic_read_relaxed(&utctx->enabled_traces) &
354           U_TRACE_TYPE_MARKERS;
355 }
356 
357 #ifdef __cplusplus
358 }
359 #endif
360 
361 #endif /* _U_TRACE_H */
362