• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2020 Advanced Micro Devices, Inc.
3  * Copyright 2020 Valve Corporation
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #ifndef AC_SQTT_H
9 #define AC_SQTT_H
10 
11 #include <stdint.h>
12 #include <stdbool.h>
13 
14 #include <assert.h>
15 #include "ac_rgp.h"
16 #include "amd_family.h"
17 
18 struct radeon_cmdbuf;
19 struct radeon_info;
20 
21 /**
22  * SQ Thread tracing is a tracing mechanism that allows taking a detailed look
23  * at what the shader cores are doing.
24  *
25  * Among the things recorded are:
26  *  - draws/dispatches + state
27  *  - when each wave starts and stops.
28  *  - for one SIMD per SE all instructions executed on that SIMD.
29  *
30  * The hardware stores all these as events in a buffer, no manual barrier
31  * around each command needed. The primary user of this is RGP.
32  */
33 struct ac_sqtt {
34    struct radeon_cmdbuf *start_cs[2];
35    struct radeon_cmdbuf *stop_cs[2];
36    /* struct radeon_winsys_bo or struct pb_buffer */
37    void *bo;
38    void *ptr;
39    uint32_t buffer_size;
40    int start_frame;
41    char *trigger_file;
42 
43    uint32_t cmdbuf_ids_per_queue[AMD_NUM_IP_TYPES];
44 
45    struct rgp_code_object rgp_code_object;
46    struct rgp_loader_events rgp_loader_events;
47    struct rgp_pso_correlation rgp_pso_correlation;
48 
49    struct rgp_queue_info rgp_queue_info;
50    struct rgp_queue_event rgp_queue_event;
51 
52    struct rgp_clock_calibration rgp_clock_calibration;
53 
54    struct hash_table_u64 *pipeline_bos;
55 };
56 
57 #define SQTT_BUFFER_ALIGN_SHIFT 12
58 
59 struct ac_sqtt_data_info {
60    uint32_t cur_offset;
61    uint32_t trace_status;
62    union {
63       uint32_t gfx9_write_counter;
64       uint32_t gfx10_dropped_cntr;
65    };
66 };
67 
68 struct ac_sqtt_data_se {
69    struct ac_sqtt_data_info info;
70    void *data_ptr;
71    uint32_t shader_engine;
72    uint32_t compute_unit;
73 };
74 
75 #define SQTT_MAX_TRACES 6
76 
77 struct ac_sqtt_trace {
78    const struct rgp_code_object *rgp_code_object;
79    const struct rgp_loader_events *rgp_loader_events;
80    const struct rgp_pso_correlation *rgp_pso_correlation;
81    const struct rgp_queue_info *rgp_queue_info;
82    const struct rgp_queue_event *rgp_queue_event;
83    const struct rgp_clock_calibration *rgp_clock_calibration;
84 
85    uint32_t num_traces;
86    struct ac_sqtt_data_se traces[SQTT_MAX_TRACES];
87 };
88 
89 uint64_t ac_sqtt_get_info_offset(unsigned se);
90 
91 uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
92                                  unsigned se);
93 uint64_t ac_sqtt_get_info_va(uint64_t va, unsigned se);
94 
95 uint64_t ac_sqtt_get_data_va(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
96                              uint64_t va, unsigned se);
97 
98 void ac_sqtt_init(struct ac_sqtt *data);
99 
100 void ac_sqtt_finish(struct ac_sqtt *data);
101 
102 bool ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
103                          const struct ac_sqtt_data_info *info);
104 
105 uint32_t ac_get_expected_buffer_size(struct radeon_info *rad_info,
106                                      const struct ac_sqtt_data_info *info);
107 
108 /**
109  * Identifiers for RGP SQ thread-tracing markers (Table 1)
110  */
111 enum rgp_sqtt_marker_identifier
112 {
113    RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0,
114    RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1,
115    RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2,
116    RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3,
117    RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4,
118    RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5,
119    RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6,
120    RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7,
121    RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8,
122    RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9,
123    RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA,
124    RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB,
125    RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC,
126    RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD,
127    RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE,
128    RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF
129 };
130 
131 /**
132  * Command buffer IDs used in RGP SQ thread-tracing markers (only 20 bits).
133  */
134 union rgp_sqtt_marker_cb_id {
135    struct {
136       uint32_t per_frame : 1; /* Must be 1, frame-based command buffer ID. */
137       uint32_t frame_index : 7;
138       uint32_t cb_index : 12; /* Command buffer index within the frame. */
139       uint32_t reserved : 12;
140    } per_frame_cb_id;
141 
142    struct {
143       uint32_t per_frame : 1; /* Must be 0, global command buffer ID. */
144       uint32_t cb_index : 19; /* Global command buffer index. */
145       uint32_t reserved : 12;
146    } global_cb_id;
147 
148    uint32_t all;
149 };
150 
151 /**
152  * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2)
153  */
154 struct rgp_sqtt_marker_cb_start {
155    union {
156       struct {
157          uint32_t identifier : 4;
158          uint32_t ext_dwords : 3;
159          uint32_t cb_id : 20;
160          uint32_t queue : 5;
161       };
162       uint32_t dword01;
163    };
164    union {
165       uint32_t device_id_low;
166       uint32_t dword02;
167    };
168    union {
169       uint32_t device_id_high;
170       uint32_t dword03;
171    };
172    union {
173       uint32_t queue_flags;
174       uint32_t dword04;
175    };
176 };
177 
178 static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16,
179               "rgp_sqtt_marker_cb_start doesn't match RGP spec");
180 
181 /**
182  *
183  * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3)
184  */
185 struct rgp_sqtt_marker_cb_end {
186    union {
187       struct {
188          uint32_t identifier : 4;
189          uint32_t ext_dwords : 3;
190          uint32_t cb_id : 20;
191          uint32_t reserved : 5;
192       };
193       uint32_t dword01;
194    };
195    union {
196       uint32_t device_id_low;
197       uint32_t dword02;
198    };
199    union {
200       uint32_t device_id_high;
201       uint32_t dword03;
202    };
203 };
204 
205 static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12,
206               "rgp_sqtt_marker_cb_end doesn't match RGP spec");
207 
208 /**
209  * API types used in RGP SQ thread-tracing markers for the "General API"
210  * packet.
211  */
212 enum rgp_sqtt_marker_general_api_type
213 {
214    ApiCmdBindPipeline = 0,
215    ApiCmdBindDescriptorSets = 1,
216    ApiCmdBindIndexBuffer = 2,
217    ApiCmdBindVertexBuffers = 3,
218    ApiCmdDraw = 4,
219    ApiCmdDrawIndexed = 5,
220    ApiCmdDrawIndirect = 6,
221    ApiCmdDrawIndexedIndirect = 7,
222    ApiCmdDrawIndirectCountAMD = 8,
223    ApiCmdDrawIndexedIndirectCountAMD = 9,
224    ApiCmdDispatch = 10,
225    ApiCmdDispatchIndirect = 11,
226    ApiCmdCopyBuffer = 12,
227    ApiCmdCopyImage = 13,
228    ApiCmdBlitImage = 14,
229    ApiCmdCopyBufferToImage = 15,
230    ApiCmdCopyImageToBuffer = 16,
231    ApiCmdUpdateBuffer = 17,
232    ApiCmdFillBuffer = 18,
233    ApiCmdClearColorImage = 19,
234    ApiCmdClearDepthStencilImage = 20,
235    ApiCmdClearAttachments = 21,
236    ApiCmdResolveImage = 22,
237    ApiCmdWaitEvents = 23,
238    ApiCmdPipelineBarrier = 24,
239    ApiCmdBeginQuery = 25,
240    ApiCmdEndQuery = 26,
241    ApiCmdResetQueryPool = 27,
242    ApiCmdWriteTimestamp = 28,
243    ApiCmdCopyQueryPoolResults = 29,
244    ApiCmdPushConstants = 30,
245    ApiCmdBeginRenderPass = 31,
246    ApiCmdNextSubpass = 32,
247    ApiCmdEndRenderPass = 33,
248    ApiCmdExecuteCommands = 34,
249    ApiCmdSetViewport = 35,
250    ApiCmdSetScissor = 36,
251    ApiCmdSetLineWidth = 37,
252    ApiCmdSetDepthBias = 38,
253    ApiCmdSetBlendConstants = 39,
254    ApiCmdSetDepthBounds = 40,
255    ApiCmdSetStencilCompareMask = 41,
256    ApiCmdSetStencilWriteMask = 42,
257    ApiCmdSetStencilReference = 43,
258    ApiCmdDrawIndirectCount = 44,
259    ApiCmdDrawIndexedIndirectCount = 45,
260    /* gap */
261    ApiCmdDrawMeshTasksEXT = 47,
262    ApiCmdDrawMeshTasksIndirectCountEXT = 48,
263    ApiCmdDrawMeshTasksIndirectEXT = 49,
264 
265    ApiRayTracingSeparateCompiled = 0x800000,
266    ApiInvalid = 0xffffffff
267 };
268 
269 /**
270  * RGP SQ thread-tracing marker for a "General API" instrumentation packet.
271  */
272 struct rgp_sqtt_marker_general_api {
273    union {
274       struct {
275          uint32_t identifier : 4;
276          uint32_t ext_dwords : 3;
277          uint32_t api_type : 20;
278          uint32_t is_end : 1;
279          uint32_t reserved : 4;
280       };
281       uint32_t dword01;
282    };
283 };
284 
285 static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4,
286               "rgp_sqtt_marker_general_api doesn't match RGP spec");
287 
288 /**
289  * API types used in RGP SQ thread-tracing markers (Table 16).
290  */
291 enum rgp_sqtt_marker_event_type
292 {
293    EventCmdDraw = 0,
294    EventCmdDrawIndexed = 1,
295    EventCmdDrawIndirect = 2,
296    EventCmdDrawIndexedIndirect = 3,
297    EventCmdDrawIndirectCountAMD = 4,
298    EventCmdDrawIndexedIndirectCountAMD = 5,
299    EventCmdDispatch = 6,
300    EventCmdDispatchIndirect = 7,
301    EventCmdCopyBuffer = 8,
302    EventCmdCopyImage = 9,
303    EventCmdBlitImage = 10,
304    EventCmdCopyBufferToImage = 11,
305    EventCmdCopyImageToBuffer = 12,
306    EventCmdUpdateBuffer = 13,
307    EventCmdFillBuffer = 14,
308    EventCmdClearColorImage = 15,
309    EventCmdClearDepthStencilImage = 16,
310    EventCmdClearAttachments = 17,
311    EventCmdResolveImage = 18,
312    EventCmdWaitEvents = 19,
313    EventCmdPipelineBarrier = 20,
314    EventCmdResetQueryPool = 21,
315    EventCmdCopyQueryPoolResults = 22,
316    EventRenderPassColorClear = 23,
317    EventRenderPassDepthStencilClear = 24,
318    EventRenderPassResolve = 25,
319    EventInternalUnknown = 26,
320    EventCmdDrawIndirectCount = 27,
321    EventCmdDrawIndexedIndirectCount = 28,
322    /* gap */
323    EventCmdTraceRaysKHR = 30,
324    EventCmdTraceRaysIndirectKHR = 31,
325    EventCmdBuildAccelerationStructuresKHR = 32,
326    EventCmdBuildAccelerationStructuresIndirectKHR = 33,
327    EventCmdCopyAccelerationStructureKHR = 34,
328    EventCmdCopyAccelerationStructureToMemoryKHR = 35,
329    EventCmdCopyMemoryToAccelerationStructureKHR = 36,
330    /* gap */
331    EventCmdDrawMeshTasksEXT = 41,
332    EventCmdDrawMeshTasksIndirectCountEXT = 42,
333    EventCmdDrawMeshTasksIndirectEXT = 43,
334    EventUnknown = 0x7fff,
335    EventInvalid = 0xffffffff
336 };
337 
338 /**
339  * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4)
340  */
341 struct rgp_sqtt_marker_event {
342    union {
343       struct {
344          uint32_t identifier : 4;
345          uint32_t ext_dwords : 3;
346          uint32_t api_type : 24;
347          uint32_t has_thread_dims : 1;
348       };
349       uint32_t dword01;
350    };
351    union {
352       struct {
353          uint32_t cb_id : 20;
354          uint32_t vertex_offset_reg_idx : 4;
355          uint32_t instance_offset_reg_idx : 4;
356          uint32_t draw_index_reg_idx : 4;
357       };
358       uint32_t dword02;
359    };
360    union {
361       uint32_t cmd_id;
362       uint32_t dword03;
363    };
364 };
365 
366 static_assert(sizeof(struct rgp_sqtt_marker_event) == 12,
367               "rgp_sqtt_marker_event doesn't match RGP spec");
368 
369 /**
370  * Per-dispatch specific marker where workgroup dims are included.
371  */
372 struct rgp_sqtt_marker_event_with_dims {
373    struct rgp_sqtt_marker_event event;
374    uint32_t thread_x;
375    uint32_t thread_y;
376    uint32_t thread_z;
377 };
378 
379 static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24,
380               "rgp_sqtt_marker_event_with_dims doesn't match RGP spec");
381 
382 /**
383  * "Barrier Start" RGP SQTT instrumentation marker (Table 5)
384  */
385 struct rgp_sqtt_marker_barrier_start {
386    union {
387       struct {
388          uint32_t identifier : 4;
389          uint32_t ext_dwords : 3;
390          uint32_t cb_id : 20;
391          uint32_t reserved : 5;
392       };
393       uint32_t dword01;
394    };
395    union {
396       struct {
397          uint32_t driver_reason : 31;
398          uint32_t internal : 1;
399       };
400       uint32_t dword02;
401    };
402 };
403 
404 static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8,
405               "rgp_sqtt_marker_barrier_start doesn't match RGP spec");
406 
407 /**
408  * "Barrier End" RGP SQTT instrumentation marker (Table 6)
409  */
410 struct rgp_sqtt_marker_barrier_end {
411    union {
412       struct {
413          uint32_t identifier : 4;
414          uint32_t ext_dwords : 3;
415          uint32_t cb_id : 20;
416          uint32_t wait_on_eop_ts : 1;
417          uint32_t vs_partial_flush : 1;
418          uint32_t ps_partial_flush : 1;
419          uint32_t cs_partial_flush : 1;
420          uint32_t pfp_sync_me : 1;
421       };
422       uint32_t dword01;
423    };
424    union {
425       struct {
426          uint32_t sync_cp_dma : 1;
427          uint32_t inval_tcp : 1;
428          uint32_t inval_sqI : 1;
429          uint32_t inval_sqK : 1;
430          uint32_t flush_tcc : 1;
431          uint32_t inval_tcc : 1;
432          uint32_t flush_cb : 1;
433          uint32_t inval_cb : 1;
434          uint32_t flush_db : 1;
435          uint32_t inval_db : 1;
436          uint32_t num_layout_transitions : 16;
437          uint32_t inval_gl1 : 1;
438          uint32_t wait_on_ts : 1;
439          uint32_t eop_ts_bottom_of_pipe : 1;
440          uint32_t eos_ts_ps_done : 1;
441          uint32_t eos_ts_cs_done : 1;
442          uint32_t reserved : 1;
443       };
444       uint32_t dword02;
445    };
446 };
447 
448 static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8,
449               "rgp_sqtt_marker_barrier_end doesn't match RGP spec");
450 
451 /**
452  * "Layout Transition" RGP SQTT instrumentation marker (Table 7)
453  */
454 struct rgp_sqtt_marker_layout_transition {
455    union {
456       struct {
457          uint32_t identifier : 4;
458          uint32_t ext_dwords : 3;
459          uint32_t depth_stencil_expand : 1;
460          uint32_t htile_hiz_range_expand : 1;
461          uint32_t depth_stencil_resummarize : 1;
462          uint32_t dcc_decompress : 1;
463          uint32_t fmask_decompress : 1;
464          uint32_t fast_clear_eliminate : 1;
465          uint32_t fmask_color_expand : 1;
466          uint32_t init_mask_ram : 1;
467          uint32_t reserved1 : 17;
468       };
469       uint32_t dword01;
470    };
471    union {
472       struct {
473          uint32_t reserved2 : 32;
474       };
475       uint32_t dword02;
476    };
477 };
478 
479 static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8,
480               "rgp_sqtt_marker_layout_transition doesn't match RGP spec");
481 
482 
483 /**
484  * "User Event" RGP SQTT instrumentation marker (Table 8)
485  */
486 struct rgp_sqtt_marker_user_event {
487    union {
488       struct {
489          uint32_t identifier : 4;
490          uint32_t reserved0 : 8;
491          uint32_t data_type : 8;
492          uint32_t reserved1 : 12;
493       };
494       uint32_t dword01;
495    };
496 };
497 struct rgp_sqtt_marker_user_event_with_length {
498    struct rgp_sqtt_marker_user_event user_event;
499    uint32_t length;
500 };
501 
502 static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4,
503               "rgp_sqtt_marker_user_event doesn't match RGP spec");
504 
505 enum rgp_sqtt_marker_user_event_type
506 {
507    UserEventTrigger = 0,
508    UserEventPop,
509    UserEventPush,
510    UserEventObjectName,
511 };
512 
513 /**
514  * "Pipeline bind" RGP SQTT instrumentation marker (Table 12)
515  */
516 struct rgp_sqtt_marker_pipeline_bind {
517    union {
518       struct {
519          uint32_t identifier : 4;
520          uint32_t ext_dwords : 3;
521          uint32_t bind_point : 1;
522          uint32_t cb_id : 20;
523          uint32_t reserved : 4;
524       };
525       uint32_t dword01;
526    };
527    union {
528       uint32_t api_pso_hash[2];
529       struct {
530          uint32_t dword02;
531          uint32_t dword03;
532       };
533    };
534 };
535 
536 static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12,
537               "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec");
538 
539 bool ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t api_hash);
540 
541 bool ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash,
542                                           uint64_t base_address);
543 
544 bool ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp,
545                                    uint64_t gpu_timestamp);
546 
547 bool ac_check_profile_state(const struct radeon_info *info);
548 
549 union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt,
550                                                        enum amd_ip_type ip_type);
551 
552 bool ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se);
553 
554 bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info,
555                        struct ac_sqtt_trace *sqtt_trace);
556 
557 uint32_t ac_sqtt_get_shader_mask(const struct radeon_info *info);
558 
559 uint32_t ac_sqtt_get_active_cu(const struct radeon_info *info, unsigned se);
560 
561 #endif
562