• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2020 Advanced Micro Devices, Inc.
3  * Copyright 2020 Valve Corporation
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #ifndef AC_SQTT_H
9 #define AC_SQTT_H
10 
11 #include <stdint.h>
12 #include <stdbool.h>
13 
14 #include <assert.h>
15 #include "ac_pm4.h"
16 #include "ac_rgp.h"
17 #include "amd_family.h"
18 
19 struct radeon_cmdbuf;
20 struct radeon_info;
21 
22 /**
23  * SQ Thread tracing is a tracing mechanism that allows taking a detailed look
24  * at what the shader cores are doing.
25  *
26  * Among the things recorded are:
27  *  - draws/dispatches + state
28  *  - when each wave starts and stops.
29  *  - for one SIMD per SE all instructions executed on that SIMD.
30  *
31  * The hardware stores all these as events in a buffer, no manual barrier
32  * around each command needed. The primary user of this is RGP.
33  */
34 struct ac_sqtt {
35    struct radeon_cmdbuf *start_cs[2];
36    struct radeon_cmdbuf *stop_cs[2];
37    /* struct radeon_winsys_bo or struct pb_buffer */
38    void *bo;
39    uint64_t buffer_va;
40    void *ptr;
41    uint32_t buffer_size;
42    int start_frame;
43    char *trigger_file;
44    bool instruction_timing_enabled;
45 
46    uint32_t cmdbuf_ids_per_queue[AMD_NUM_IP_TYPES];
47 
48    struct rgp_code_object rgp_code_object;
49    struct rgp_loader_events rgp_loader_events;
50    struct rgp_pso_correlation rgp_pso_correlation;
51 
52    struct rgp_queue_info rgp_queue_info;
53    struct rgp_queue_event rgp_queue_event;
54 
55    struct rgp_clock_calibration rgp_clock_calibration;
56 
57    struct hash_table_u64 *pipeline_bos;
58 };
59 
60 #define SQTT_BUFFER_ALIGN_SHIFT 12
61 
62 struct ac_sqtt_data_info {
63    uint32_t cur_offset;
64    uint32_t trace_status;
65    union {
66       uint32_t gfx9_write_counter;
67       uint32_t gfx10_dropped_cntr;
68    };
69 };
70 
71 struct ac_sqtt_data_se {
72    struct ac_sqtt_data_info info;
73    void *data_ptr;
74    uint32_t shader_engine;
75    uint32_t compute_unit;
76 };
77 
78 #define SQTT_MAX_TRACES 6
79 
80 struct ac_sqtt_trace {
81    const struct rgp_code_object *rgp_code_object;
82    const struct rgp_loader_events *rgp_loader_events;
83    const struct rgp_pso_correlation *rgp_pso_correlation;
84    const struct rgp_queue_info *rgp_queue_info;
85    const struct rgp_queue_event *rgp_queue_event;
86    const struct rgp_clock_calibration *rgp_clock_calibration;
87 
88    uint32_t num_traces;
89    struct ac_sqtt_data_se traces[SQTT_MAX_TRACES];
90 };
91 
92 uint64_t ac_sqtt_get_info_offset(unsigned se);
93 
94 uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
95                                  unsigned se);
96 
97 void ac_sqtt_init(struct ac_sqtt *data);
98 
99 void ac_sqtt_finish(struct ac_sqtt *data);
100 
101 bool ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
102                          const struct ac_sqtt_data_info *info);
103 
104 /**
105  * Identifiers for RGP SQ thread-tracing markers (Table 1)
106  */
107 enum rgp_sqtt_marker_identifier
108 {
109    RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0,
110    RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1,
111    RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2,
112    RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3,
113    RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4,
114    RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5,
115    RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6,
116    RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7,
117    RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8,
118    RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9,
119    RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA,
120    RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB,
121    RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC,
122    RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD,
123    RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE,
124    RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF
125 };
126 
127 /**
128  * Command buffer IDs used in RGP SQ thread-tracing markers (only 20 bits).
129  */
130 union rgp_sqtt_marker_cb_id {
131    struct {
132       uint32_t per_frame : 1; /* Must be 1, frame-based command buffer ID. */
133       uint32_t frame_index : 7;
134       uint32_t cb_index : 12; /* Command buffer index within the frame. */
135       uint32_t reserved : 12;
136    } per_frame_cb_id;
137 
138    struct {
139       uint32_t per_frame : 1; /* Must be 0, global command buffer ID. */
140       uint32_t cb_index : 19; /* Global command buffer index. */
141       uint32_t reserved : 12;
142    } global_cb_id;
143 
144    uint32_t all;
145 };
146 
147 /**
148  * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2)
149  */
150 struct rgp_sqtt_marker_cb_start {
151    union {
152       struct {
153          uint32_t identifier : 4;
154          uint32_t ext_dwords : 3;
155          uint32_t cb_id : 20;
156          uint32_t queue : 5;
157       };
158       uint32_t dword01;
159    };
160    union {
161       uint32_t device_id_low;
162       uint32_t dword02;
163    };
164    union {
165       uint32_t device_id_high;
166       uint32_t dword03;
167    };
168    union {
169       uint32_t queue_flags;
170       uint32_t dword04;
171    };
172 };
173 
174 static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16,
175               "rgp_sqtt_marker_cb_start doesn't match RGP spec");
176 
177 /**
178  *
179  * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3)
180  */
181 struct rgp_sqtt_marker_cb_end {
182    union {
183       struct {
184          uint32_t identifier : 4;
185          uint32_t ext_dwords : 3;
186          uint32_t cb_id : 20;
187          uint32_t reserved : 5;
188       };
189       uint32_t dword01;
190    };
191    union {
192       uint32_t device_id_low;
193       uint32_t dword02;
194    };
195    union {
196       uint32_t device_id_high;
197       uint32_t dword03;
198    };
199 };
200 
201 static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12,
202               "rgp_sqtt_marker_cb_end doesn't match RGP spec");
203 
204 /**
205  * API types used in RGP SQ thread-tracing markers for the "General API"
206  * packet.
207  */
208 enum rgp_sqtt_marker_general_api_type
209 {
210    ApiCmdBindPipeline = 0,
211    ApiCmdBindDescriptorSets = 1,
212    ApiCmdBindIndexBuffer = 2,
213    ApiCmdBindVertexBuffers = 3,
214    ApiCmdDraw = 4,
215    ApiCmdDrawIndexed = 5,
216    ApiCmdDrawIndirect = 6,
217    ApiCmdDrawIndexedIndirect = 7,
218    ApiCmdDrawIndirectCountAMD = 8,
219    ApiCmdDrawIndexedIndirectCountAMD = 9,
220    ApiCmdDispatch = 10,
221    ApiCmdDispatchIndirect = 11,
222    ApiCmdCopyBuffer = 12,
223    ApiCmdCopyImage = 13,
224    ApiCmdBlitImage = 14,
225    ApiCmdCopyBufferToImage = 15,
226    ApiCmdCopyImageToBuffer = 16,
227    ApiCmdUpdateBuffer = 17,
228    ApiCmdFillBuffer = 18,
229    ApiCmdClearColorImage = 19,
230    ApiCmdClearDepthStencilImage = 20,
231    ApiCmdClearAttachments = 21,
232    ApiCmdResolveImage = 22,
233    ApiCmdWaitEvents = 23,
234    ApiCmdPipelineBarrier = 24,
235    ApiCmdBeginQuery = 25,
236    ApiCmdEndQuery = 26,
237    ApiCmdResetQueryPool = 27,
238    ApiCmdWriteTimestamp = 28,
239    ApiCmdCopyQueryPoolResults = 29,
240    ApiCmdPushConstants = 30,
241    ApiCmdBeginRenderPass = 31,
242    ApiCmdNextSubpass = 32,
243    ApiCmdEndRenderPass = 33,
244    ApiCmdExecuteCommands = 34,
245    ApiCmdSetViewport = 35,
246    ApiCmdSetScissor = 36,
247    ApiCmdSetLineWidth = 37,
248    ApiCmdSetDepthBias = 38,
249    ApiCmdSetBlendConstants = 39,
250    ApiCmdSetDepthBounds = 40,
251    ApiCmdSetStencilCompareMask = 41,
252    ApiCmdSetStencilWriteMask = 42,
253    ApiCmdSetStencilReference = 43,
254    ApiCmdDrawIndirectCount = 44,
255    ApiCmdDrawIndexedIndirectCount = 45,
256    /* gap */
257    ApiCmdDrawMeshTasksEXT = 47,
258    ApiCmdDrawMeshTasksIndirectCountEXT = 48,
259    ApiCmdDrawMeshTasksIndirectEXT = 49,
260 
261    ApiRayTracingSeparateCompiled = 0x800000,
262    ApiInvalid = 0xffffffff
263 };
264 
265 /**
266  * RGP SQ thread-tracing marker for a "General API" instrumentation packet.
267  */
268 struct rgp_sqtt_marker_general_api {
269    union {
270       struct {
271          uint32_t identifier : 4;
272          uint32_t ext_dwords : 3;
273          uint32_t api_type : 20;
274          uint32_t is_end : 1;
275          uint32_t reserved : 4;
276       };
277       uint32_t dword01;
278    };
279 };
280 
281 static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4,
282               "rgp_sqtt_marker_general_api doesn't match RGP spec");
283 
284 /**
285  * API types used in RGP SQ thread-tracing markers (Table 16).
286  */
287 enum rgp_sqtt_marker_event_type
288 {
289    EventCmdDraw = 0,
290    EventCmdDrawIndexed = 1,
291    EventCmdDrawIndirect = 2,
292    EventCmdDrawIndexedIndirect = 3,
293    EventCmdDrawIndirectCountAMD = 4,
294    EventCmdDrawIndexedIndirectCountAMD = 5,
295    EventCmdDispatch = 6,
296    EventCmdDispatchIndirect = 7,
297    EventCmdCopyBuffer = 8,
298    EventCmdCopyImage = 9,
299    EventCmdBlitImage = 10,
300    EventCmdCopyBufferToImage = 11,
301    EventCmdCopyImageToBuffer = 12,
302    EventCmdUpdateBuffer = 13,
303    EventCmdFillBuffer = 14,
304    EventCmdClearColorImage = 15,
305    EventCmdClearDepthStencilImage = 16,
306    EventCmdClearAttachments = 17,
307    EventCmdResolveImage = 18,
308    EventCmdWaitEvents = 19,
309    EventCmdPipelineBarrier = 20,
310    EventCmdResetQueryPool = 21,
311    EventCmdCopyQueryPoolResults = 22,
312    EventRenderPassColorClear = 23,
313    EventRenderPassDepthStencilClear = 24,
314    EventRenderPassResolve = 25,
315    EventInternalUnknown = 26,
316    EventCmdDrawIndirectCount = 27,
317    EventCmdDrawIndexedIndirectCount = 28,
318    /* gap */
319    EventCmdTraceRaysKHR = 30,
320    EventCmdTraceRaysIndirectKHR = 31,
321    EventCmdBuildAccelerationStructuresKHR = 32,
322    EventCmdBuildAccelerationStructuresIndirectKHR = 33,
323    EventCmdCopyAccelerationStructureKHR = 34,
324    EventCmdCopyAccelerationStructureToMemoryKHR = 35,
325    EventCmdCopyMemoryToAccelerationStructureKHR = 36,
326    /* gap */
327    EventCmdDrawMeshTasksEXT = 41,
328    EventCmdDrawMeshTasksIndirectCountEXT = 42,
329    EventCmdDrawMeshTasksIndirectEXT = 43,
330    EventUnknown = 0x7fff,
331    EventInvalid = 0xffffffff
332 };
333 
334 /**
335  * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4)
336  */
337 struct rgp_sqtt_marker_event {
338    union {
339       struct {
340          uint32_t identifier : 4;
341          uint32_t ext_dwords : 3;
342          uint32_t api_type : 24;
343          uint32_t has_thread_dims : 1;
344       };
345       uint32_t dword01;
346    };
347    union {
348       struct {
349          uint32_t cb_id : 20;
350          uint32_t vertex_offset_reg_idx : 4;
351          uint32_t instance_offset_reg_idx : 4;
352          uint32_t draw_index_reg_idx : 4;
353       };
354       uint32_t dword02;
355    };
356    union {
357       uint32_t cmd_id;
358       uint32_t dword03;
359    };
360 };
361 
362 static_assert(sizeof(struct rgp_sqtt_marker_event) == 12,
363               "rgp_sqtt_marker_event doesn't match RGP spec");
364 
365 /**
366  * Per-dispatch specific marker where workgroup dims are included.
367  */
368 struct rgp_sqtt_marker_event_with_dims {
369    struct rgp_sqtt_marker_event event;
370    uint32_t thread_x;
371    uint32_t thread_y;
372    uint32_t thread_z;
373 };
374 
375 static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24,
376               "rgp_sqtt_marker_event_with_dims doesn't match RGP spec");
377 
378 /**
379  * "Barrier Start" RGP SQTT instrumentation marker (Table 5)
380  */
381 struct rgp_sqtt_marker_barrier_start {
382    union {
383       struct {
384          uint32_t identifier : 4;
385          uint32_t ext_dwords : 3;
386          uint32_t cb_id : 20;
387          uint32_t reserved : 5;
388       };
389       uint32_t dword01;
390    };
391    union {
392       struct {
393          uint32_t driver_reason : 31;
394          uint32_t internal : 1;
395       };
396       uint32_t dword02;
397    };
398 };
399 
400 static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8,
401               "rgp_sqtt_marker_barrier_start doesn't match RGP spec");
402 
403 /**
404  * "Barrier End" RGP SQTT instrumentation marker (Table 6)
405  */
406 struct rgp_sqtt_marker_barrier_end {
407    union {
408       struct {
409          uint32_t identifier : 4;
410          uint32_t ext_dwords : 3;
411          uint32_t cb_id : 20;
412          uint32_t wait_on_eop_ts : 1;
413          uint32_t vs_partial_flush : 1;
414          uint32_t ps_partial_flush : 1;
415          uint32_t cs_partial_flush : 1;
416          uint32_t pfp_sync_me : 1;
417       };
418       uint32_t dword01;
419    };
420    union {
421       struct {
422          uint32_t sync_cp_dma : 1;
423          uint32_t inval_tcp : 1;
424          uint32_t inval_sqI : 1;
425          uint32_t inval_sqK : 1;
426          uint32_t flush_tcc : 1;
427          uint32_t inval_tcc : 1;
428          uint32_t flush_cb : 1;
429          uint32_t inval_cb : 1;
430          uint32_t flush_db : 1;
431          uint32_t inval_db : 1;
432          uint32_t num_layout_transitions : 16;
433          uint32_t inval_gl1 : 1;
434          uint32_t wait_on_ts : 1;
435          uint32_t eop_ts_bottom_of_pipe : 1;
436          uint32_t eos_ts_ps_done : 1;
437          uint32_t eos_ts_cs_done : 1;
438          uint32_t reserved : 1;
439       };
440       uint32_t dword02;
441    };
442 };
443 
444 static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8,
445               "rgp_sqtt_marker_barrier_end doesn't match RGP spec");
446 
447 /**
448  * "Layout Transition" RGP SQTT instrumentation marker (Table 7)
449  */
450 struct rgp_sqtt_marker_layout_transition {
451    union {
452       struct {
453          uint32_t identifier : 4;
454          uint32_t ext_dwords : 3;
455          uint32_t depth_stencil_expand : 1;
456          uint32_t htile_hiz_range_expand : 1;
457          uint32_t depth_stencil_resummarize : 1;
458          uint32_t dcc_decompress : 1;
459          uint32_t fmask_decompress : 1;
460          uint32_t fast_clear_eliminate : 1;
461          uint32_t fmask_color_expand : 1;
462          uint32_t init_mask_ram : 1;
463          uint32_t reserved1 : 17;
464       };
465       uint32_t dword01;
466    };
467    union {
468       struct {
469          uint32_t reserved2 : 32;
470       };
471       uint32_t dword02;
472    };
473 };
474 
475 static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8,
476               "rgp_sqtt_marker_layout_transition doesn't match RGP spec");
477 
478 
479 /**
480  * "User Event" RGP SQTT instrumentation marker (Table 8)
481  */
482 struct rgp_sqtt_marker_user_event {
483    union {
484       struct {
485          uint32_t identifier : 4;
486          uint32_t reserved0 : 8;
487          uint32_t data_type : 8;
488          uint32_t reserved1 : 12;
489       };
490       uint32_t dword01;
491    };
492 };
493 struct rgp_sqtt_marker_user_event_with_length {
494    struct rgp_sqtt_marker_user_event user_event;
495    uint32_t length;
496 };
497 
498 static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4,
499               "rgp_sqtt_marker_user_event doesn't match RGP spec");
500 
501 enum rgp_sqtt_marker_user_event_type
502 {
503    UserEventTrigger = 0,
504    UserEventPop,
505    UserEventPush,
506    UserEventObjectName,
507 };
508 
509 /**
510  * "Pipeline bind" RGP SQTT instrumentation marker (Table 12)
511  */
512 struct rgp_sqtt_marker_pipeline_bind {
513    union {
514       struct {
515          uint32_t identifier : 4;
516          uint32_t ext_dwords : 3;
517          uint32_t bind_point : 1;
518          uint32_t cb_id : 20;
519          uint32_t reserved : 4;
520       };
521       uint32_t dword01;
522    };
523    union {
524       uint32_t api_pso_hash[2];
525       struct {
526          uint32_t dword02;
527          uint32_t dword03;
528       };
529    };
530 };
531 
532 static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12,
533               "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec");
534 
535 bool ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t api_hash);
536 
537 bool ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash,
538                                           uint64_t base_address);
539 
540 bool ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp,
541                                    uint64_t gpu_timestamp);
542 
543 bool ac_check_profile_state(const struct radeon_info *info);
544 
545 union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt,
546                                                        enum amd_ip_type ip_type);
547 
548 bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info,
549                        struct ac_sqtt_trace *sqtt_trace);
550 
551 uint32_t ac_sqtt_get_ctrl(const struct radeon_info *info, bool enable);
552 
553 uint32_t ac_sqtt_get_shader_mask(const struct radeon_info *info);
554 
555 void ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4,
556                         const struct ac_sqtt *sqtt, bool is_compute_queue);
557 
558 void ac_sqtt_emit_stop(const struct radeon_info *info, struct ac_pm4_state *pm4,
559                        bool is_compute_queue);
560 
561 void ac_sqtt_emit_wait(const struct radeon_info *info, struct ac_pm4_state *pm4,
562                        const struct ac_sqtt *sqtt, bool is_compute_queue);
563 
564 #endif
565