• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2020 Advanced Micro Devices, Inc.
3  * Copyright 2020 Valve Corporation
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #ifndef AC_SQTT_H
9 #define AC_SQTT_H
10 
11 #include <stdint.h>
12 #include <stdbool.h>
13 
14 #include <assert.h>
15 #include "ac_pm4.h"
16 #include "ac_rgp.h"
17 #include "amd_family.h"
18 
19 struct radeon_cmdbuf;
20 struct radeon_info;
21 
22 /**
23  * SQ Thread tracing is a tracing mechanism that allows taking a detailed look
24  * at what the shader cores are doing.
25  *
26  * Among the things recorded are:
27  *  - draws/dispatches + state
28  *  - when each wave starts and stops.
29  *  - for one SIMD per SE all instructions executed on that SIMD.
30  *
31  * The hardware stores all these as events in a buffer, no manual barrier
32  * around each command needed. The primary user of this is RGP.
33  */
34 struct ac_sqtt {
35    struct radeon_cmdbuf *start_cs[2];
36    struct radeon_cmdbuf *stop_cs[2];
37    /* struct radeon_winsys_bo or struct pb_buffer */
38    void *bo;
39    uint64_t buffer_va;
40    void *ptr;
41    uint32_t buffer_size;
42    int start_frame;
43    char *trigger_file;
44    bool instruction_timing_enabled;
45 
46    uint32_t cmdbuf_ids_per_queue[AMD_NUM_IP_TYPES];
47 
48    struct rgp_code_object rgp_code_object;
49    struct rgp_loader_events rgp_loader_events;
50    struct rgp_pso_correlation rgp_pso_correlation;
51 
52    struct rgp_queue_info rgp_queue_info;
53    struct rgp_queue_event rgp_queue_event;
54 
55    struct rgp_clock_calibration rgp_clock_calibration;
56 
57    struct hash_table_u64 *pipeline_bos;
58 };
59 
60 struct ac_sqtt_data_info {
61    uint32_t cur_offset;
62    uint32_t trace_status;
63    union {
64       uint32_t gfx9_write_counter;
65       uint32_t gfx10_dropped_cntr;
66    };
67 };
68 
69 struct ac_sqtt_data_se {
70    struct ac_sqtt_data_info info;
71    void *data_ptr;
72    uint32_t shader_engine;
73    uint32_t compute_unit;
74 };
75 
76 #define SQTT_MAX_TRACES 6
77 
78 struct ac_sqtt_trace {
79    const struct rgp_code_object *rgp_code_object;
80    const struct rgp_loader_events *rgp_loader_events;
81    const struct rgp_pso_correlation *rgp_pso_correlation;
82    const struct rgp_queue_info *rgp_queue_info;
83    const struct rgp_queue_event *rgp_queue_event;
84    const struct rgp_clock_calibration *rgp_clock_calibration;
85 
86    uint32_t num_traces;
87    struct ac_sqtt_data_se traces[SQTT_MAX_TRACES];
88 };
89 
90 uint64_t ac_sqtt_get_info_offset(unsigned se);
91 
92 uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
93                                  unsigned se);
94 
95 void ac_sqtt_init(struct ac_sqtt *data);
96 
97 void ac_sqtt_finish(struct ac_sqtt *data);
98 
99 bool ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
100                          const struct ac_sqtt_data_info *info);
101 
102 /**
103  * Identifiers for RGP SQ thread-tracing markers (Table 1)
104  */
105 enum rgp_sqtt_marker_identifier
106 {
107    RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0,
108    RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1,
109    RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2,
110    RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3,
111    RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4,
112    RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5,
113    RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6,
114    RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7,
115    RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8,
116    RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9,
117    RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA,
118    RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB,
119    RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC,
120    RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD,
121    RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE,
122    RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF
123 };
124 
125 /**
126  * Command buffer IDs used in RGP SQ thread-tracing markers (only 20 bits).
127  */
128 union rgp_sqtt_marker_cb_id {
129    struct {
130       uint32_t per_frame : 1; /* Must be 1, frame-based command buffer ID. */
131       uint32_t frame_index : 7;
132       uint32_t cb_index : 12; /* Command buffer index within the frame. */
133       uint32_t reserved : 12;
134    } per_frame_cb_id;
135 
136    struct {
137       uint32_t per_frame : 1; /* Must be 0, global command buffer ID. */
138       uint32_t cb_index : 19; /* Global command buffer index. */
139       uint32_t reserved : 12;
140    } global_cb_id;
141 
142    uint32_t all;
143 };
144 
145 /**
146  * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2)
147  */
148 struct rgp_sqtt_marker_cb_start {
149    union {
150       struct {
151          uint32_t identifier : 4;
152          uint32_t ext_dwords : 3;
153          uint32_t cb_id : 20;
154          uint32_t queue : 5;
155       };
156       uint32_t dword01;
157    };
158    union {
159       uint32_t device_id_low;
160       uint32_t dword02;
161    };
162    union {
163       uint32_t device_id_high;
164       uint32_t dword03;
165    };
166    union {
167       uint32_t queue_flags;
168       uint32_t dword04;
169    };
170 };
171 
172 static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16,
173               "rgp_sqtt_marker_cb_start doesn't match RGP spec");
174 
175 /**
176  *
177  * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3)
178  */
179 struct rgp_sqtt_marker_cb_end {
180    union {
181       struct {
182          uint32_t identifier : 4;
183          uint32_t ext_dwords : 3;
184          uint32_t cb_id : 20;
185          uint32_t reserved : 5;
186       };
187       uint32_t dword01;
188    };
189    union {
190       uint32_t device_id_low;
191       uint32_t dword02;
192    };
193    union {
194       uint32_t device_id_high;
195       uint32_t dword03;
196    };
197 };
198 
199 static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12,
200               "rgp_sqtt_marker_cb_end doesn't match RGP spec");
201 
202 /**
203  * API types used in RGP SQ thread-tracing markers for the "General API"
204  * packet.
205  */
206 enum rgp_sqtt_marker_general_api_type
207 {
208    ApiCmdBindPipeline = 0,
209    ApiCmdBindDescriptorSets = 1,
210    ApiCmdBindIndexBuffer = 2,
211    ApiCmdBindVertexBuffers = 3,
212    ApiCmdDraw = 4,
213    ApiCmdDrawIndexed = 5,
214    ApiCmdDrawIndirect = 6,
215    ApiCmdDrawIndexedIndirect = 7,
216    ApiCmdDrawIndirectCountAMD = 8,
217    ApiCmdDrawIndexedIndirectCountAMD = 9,
218    ApiCmdDispatch = 10,
219    ApiCmdDispatchIndirect = 11,
220    ApiCmdCopyBuffer = 12,
221    ApiCmdCopyImage = 13,
222    ApiCmdBlitImage = 14,
223    ApiCmdCopyBufferToImage = 15,
224    ApiCmdCopyImageToBuffer = 16,
225    ApiCmdUpdateBuffer = 17,
226    ApiCmdFillBuffer = 18,
227    ApiCmdClearColorImage = 19,
228    ApiCmdClearDepthStencilImage = 20,
229    ApiCmdClearAttachments = 21,
230    ApiCmdResolveImage = 22,
231    ApiCmdWaitEvents = 23,
232    ApiCmdPipelineBarrier = 24,
233    ApiCmdBeginQuery = 25,
234    ApiCmdEndQuery = 26,
235    ApiCmdResetQueryPool = 27,
236    ApiCmdWriteTimestamp = 28,
237    ApiCmdCopyQueryPoolResults = 29,
238    ApiCmdPushConstants = 30,
239    ApiCmdBeginRenderPass = 31,
240    ApiCmdNextSubpass = 32,
241    ApiCmdEndRenderPass = 33,
242    ApiCmdExecuteCommands = 34,
243    ApiCmdSetViewport = 35,
244    ApiCmdSetScissor = 36,
245    ApiCmdSetLineWidth = 37,
246    ApiCmdSetDepthBias = 38,
247    ApiCmdSetBlendConstants = 39,
248    ApiCmdSetDepthBounds = 40,
249    ApiCmdSetStencilCompareMask = 41,
250    ApiCmdSetStencilWriteMask = 42,
251    ApiCmdSetStencilReference = 43,
252    ApiCmdDrawIndirectCount = 44,
253    ApiCmdDrawIndexedIndirectCount = 45,
254    /* gap */
255    ApiCmdDrawMeshTasksEXT = 47,
256    ApiCmdDrawMeshTasksIndirectCountEXT = 48,
257    ApiCmdDrawMeshTasksIndirectEXT = 49,
258 
259    ApiRayTracingSeparateCompiled = 0x800000,
260    ApiInvalid = 0xffffffff
261 };
262 
263 /**
264  * RGP SQ thread-tracing marker for a "General API" instrumentation packet.
265  */
266 struct rgp_sqtt_marker_general_api {
267    union {
268       struct {
269          uint32_t identifier : 4;
270          uint32_t ext_dwords : 3;
271          uint32_t api_type : 20;
272          uint32_t is_end : 1;
273          uint32_t reserved : 4;
274       };
275       uint32_t dword01;
276    };
277 };
278 
279 static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4,
280               "rgp_sqtt_marker_general_api doesn't match RGP spec");
281 
282 /**
283  * API types used in RGP SQ thread-tracing markers (Table 16).
284  */
285 enum rgp_sqtt_marker_event_type
286 {
287    EventCmdDraw = 0,
288    EventCmdDrawIndexed = 1,
289    EventCmdDrawIndirect = 2,
290    EventCmdDrawIndexedIndirect = 3,
291    EventCmdDrawIndirectCountAMD = 4,
292    EventCmdDrawIndexedIndirectCountAMD = 5,
293    EventCmdDispatch = 6,
294    EventCmdDispatchIndirect = 7,
295    EventCmdCopyBuffer = 8,
296    EventCmdCopyImage = 9,
297    EventCmdBlitImage = 10,
298    EventCmdCopyBufferToImage = 11,
299    EventCmdCopyImageToBuffer = 12,
300    EventCmdUpdateBuffer = 13,
301    EventCmdFillBuffer = 14,
302    EventCmdClearColorImage = 15,
303    EventCmdClearDepthStencilImage = 16,
304    EventCmdClearAttachments = 17,
305    EventCmdResolveImage = 18,
306    EventCmdWaitEvents = 19,
307    EventCmdPipelineBarrier = 20,
308    EventCmdResetQueryPool = 21,
309    EventCmdCopyQueryPoolResults = 22,
310    EventRenderPassColorClear = 23,
311    EventRenderPassDepthStencilClear = 24,
312    EventRenderPassResolve = 25,
313    EventInternalUnknown = 26,
314    EventCmdDrawIndirectCount = 27,
315    EventCmdDrawIndexedIndirectCount = 28,
316    /* gap */
317    EventCmdTraceRaysKHR = 30,
318    EventCmdTraceRaysIndirectKHR = 31,
319    EventCmdBuildAccelerationStructuresKHR = 32,
320    EventCmdBuildAccelerationStructuresIndirectKHR = 33,
321    EventCmdCopyAccelerationStructureKHR = 34,
322    EventCmdCopyAccelerationStructureToMemoryKHR = 35,
323    EventCmdCopyMemoryToAccelerationStructureKHR = 36,
324    /* gap */
325    EventCmdDrawMeshTasksEXT = 41,
326    EventCmdDrawMeshTasksIndirectCountEXT = 42,
327    EventCmdDrawMeshTasksIndirectEXT = 43,
328    EventUnknown = 0x7fff,
329    EventInvalid = 0xffffffff
330 };
331 
332 /**
333  * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4)
334  */
335 struct rgp_sqtt_marker_event {
336    union {
337       struct {
338          uint32_t identifier : 4;
339          uint32_t ext_dwords : 3;
340          uint32_t api_type : 24;
341          uint32_t has_thread_dims : 1;
342       };
343       uint32_t dword01;
344    };
345    union {
346       struct {
347          uint32_t cb_id : 20;
348          uint32_t vertex_offset_reg_idx : 4;
349          uint32_t instance_offset_reg_idx : 4;
350          uint32_t draw_index_reg_idx : 4;
351       };
352       uint32_t dword02;
353    };
354    union {
355       uint32_t cmd_id;
356       uint32_t dword03;
357    };
358 };
359 
360 static_assert(sizeof(struct rgp_sqtt_marker_event) == 12,
361               "rgp_sqtt_marker_event doesn't match RGP spec");
362 
363 /**
364  * Per-dispatch specific marker where workgroup dims are included.
365  */
366 struct rgp_sqtt_marker_event_with_dims {
367    struct rgp_sqtt_marker_event event;
368    uint32_t thread_x;
369    uint32_t thread_y;
370    uint32_t thread_z;
371 };
372 
373 static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24,
374               "rgp_sqtt_marker_event_with_dims doesn't match RGP spec");
375 
376 /**
377  * "Barrier Start" RGP SQTT instrumentation marker (Table 5)
378  */
379 struct rgp_sqtt_marker_barrier_start {
380    union {
381       struct {
382          uint32_t identifier : 4;
383          uint32_t ext_dwords : 3;
384          uint32_t cb_id : 20;
385          uint32_t reserved : 5;
386       };
387       uint32_t dword01;
388    };
389    union {
390       struct {
391          uint32_t driver_reason : 31;
392          uint32_t internal : 1;
393       };
394       uint32_t dword02;
395    };
396 };
397 
398 static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8,
399               "rgp_sqtt_marker_barrier_start doesn't match RGP spec");
400 
401 /**
402  * "Barrier End" RGP SQTT instrumentation marker (Table 6)
403  */
404 struct rgp_sqtt_marker_barrier_end {
405    union {
406       struct {
407          uint32_t identifier : 4;
408          uint32_t ext_dwords : 3;
409          uint32_t cb_id : 20;
410          uint32_t wait_on_eop_ts : 1;
411          uint32_t vs_partial_flush : 1;
412          uint32_t ps_partial_flush : 1;
413          uint32_t cs_partial_flush : 1;
414          uint32_t pfp_sync_me : 1;
415       };
416       uint32_t dword01;
417    };
418    union {
419       struct {
420          uint32_t sync_cp_dma : 1;
421          uint32_t inval_tcp : 1;
422          uint32_t inval_sqI : 1;
423          uint32_t inval_sqK : 1;
424          uint32_t flush_tcc : 1;
425          uint32_t inval_tcc : 1;
426          uint32_t flush_cb : 1;
427          uint32_t inval_cb : 1;
428          uint32_t flush_db : 1;
429          uint32_t inval_db : 1;
430          uint32_t num_layout_transitions : 16;
431          uint32_t inval_gl1 : 1;
432          uint32_t wait_on_ts : 1;
433          uint32_t eop_ts_bottom_of_pipe : 1;
434          uint32_t eos_ts_ps_done : 1;
435          uint32_t eos_ts_cs_done : 1;
436          uint32_t reserved : 1;
437       };
438       uint32_t dword02;
439    };
440 };
441 
442 static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8,
443               "rgp_sqtt_marker_barrier_end doesn't match RGP spec");
444 
445 /**
446  * "Layout Transition" RGP SQTT instrumentation marker (Table 7)
447  */
448 struct rgp_sqtt_marker_layout_transition {
449    union {
450       struct {
451          uint32_t identifier : 4;
452          uint32_t ext_dwords : 3;
453          uint32_t depth_stencil_expand : 1;
454          uint32_t htile_hiz_range_expand : 1;
455          uint32_t depth_stencil_resummarize : 1;
456          uint32_t dcc_decompress : 1;
457          uint32_t fmask_decompress : 1;
458          uint32_t fast_clear_eliminate : 1;
459          uint32_t fmask_color_expand : 1;
460          uint32_t init_mask_ram : 1;
461          uint32_t reserved1 : 17;
462       };
463       uint32_t dword01;
464    };
465    union {
466       struct {
467          uint32_t reserved2 : 32;
468       };
469       uint32_t dword02;
470    };
471 };
472 
473 static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8,
474               "rgp_sqtt_marker_layout_transition doesn't match RGP spec");
475 
476 
477 /**
478  * "User Event" RGP SQTT instrumentation marker (Table 8)
479  */
480 struct rgp_sqtt_marker_user_event {
481    union {
482       struct {
483          uint32_t identifier : 4;
484          uint32_t reserved0 : 8;
485          uint32_t data_type : 8;
486          uint32_t reserved1 : 12;
487       };
488       uint32_t dword01;
489    };
490 };
491 struct rgp_sqtt_marker_user_event_with_length {
492    struct rgp_sqtt_marker_user_event user_event;
493    uint32_t length;
494 };
495 
496 static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4,
497               "rgp_sqtt_marker_user_event doesn't match RGP spec");
498 
499 enum rgp_sqtt_marker_user_event_type
500 {
501    UserEventTrigger = 0,
502    UserEventPop,
503    UserEventPush,
504    UserEventObjectName,
505 };
506 
507 /**
508  * "Pipeline bind" RGP SQTT instrumentation marker (Table 12)
509  */
510 struct rgp_sqtt_marker_pipeline_bind {
511    union {
512       struct {
513          uint32_t identifier : 4;
514          uint32_t ext_dwords : 3;
515          uint32_t bind_point : 1;
516          uint32_t cb_id : 20;
517          uint32_t reserved : 4;
518       };
519       uint32_t dword01;
520    };
521    union {
522       uint32_t api_pso_hash[2];
523       struct {
524          uint32_t dword02;
525          uint32_t dword03;
526       };
527    };
528 };
529 
530 static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12,
531               "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec");
532 
533 bool ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t api_hash);
534 
535 bool ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash,
536                                           uint64_t base_address);
537 
538 bool ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp,
539                                    uint64_t gpu_timestamp);
540 
541 bool ac_check_profile_state(const struct radeon_info *info);
542 
543 union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt,
544                                                        enum amd_ip_type ip_type);
545 
546 uint32_t ac_sqtt_get_buffer_align_shift(const struct radeon_info *info);
547 
548 bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info,
549                        struct ac_sqtt_trace *sqtt_trace);
550 
551 uint32_t ac_sqtt_get_ctrl(const struct radeon_info *info, bool enable);
552 
553 uint32_t ac_sqtt_get_shader_mask(const struct radeon_info *info);
554 
555 void ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4,
556                         const struct ac_sqtt *sqtt, bool is_compute_queue);
557 
558 void ac_sqtt_emit_stop(const struct radeon_info *info, struct ac_pm4_state *pm4,
559                        bool is_compute_queue);
560 
561 void ac_sqtt_emit_wait(const struct radeon_info *info, struct ac_pm4_state *pm4,
562                        const struct ac_sqtt *sqtt, bool is_compute_queue);
563 
564 #endif
565