1 /* 2 * Copyright 2020 Advanced Micro Devices, Inc. 3 * Copyright 2020 Valve Corporation 4 * 5 * SPDX-License-Identifier: MIT 6 */ 7 8 #ifndef AC_SQTT_H 9 #define AC_SQTT_H 10 11 #include <stdint.h> 12 #include <stdbool.h> 13 14 #include <assert.h> 15 #include "ac_pm4.h" 16 #include "ac_rgp.h" 17 #include "amd_family.h" 18 19 struct radeon_cmdbuf; 20 struct radeon_info; 21 22 /** 23 * SQ Thread tracing is a tracing mechanism that allows taking a detailed look 24 * at what the shader cores are doing. 25 * 26 * Among the things recorded are: 27 * - draws/dispatches + state 28 * - when each wave starts and stops. 29 * - for one SIMD per SE all instructions executed on that SIMD. 30 * 31 * The hardware stores all these as events in a buffer, no manual barrier 32 * around each command needed. The primary user of this is RGP. 33 */ 34 struct ac_sqtt { 35 struct radeon_cmdbuf *start_cs[2]; 36 struct radeon_cmdbuf *stop_cs[2]; 37 /* struct radeon_winsys_bo or struct pb_buffer */ 38 void *bo; 39 uint64_t buffer_va; 40 void *ptr; 41 uint32_t buffer_size; 42 int start_frame; 43 char *trigger_file; 44 bool instruction_timing_enabled; 45 46 uint32_t cmdbuf_ids_per_queue[AMD_NUM_IP_TYPES]; 47 48 struct rgp_code_object rgp_code_object; 49 struct rgp_loader_events rgp_loader_events; 50 struct rgp_pso_correlation rgp_pso_correlation; 51 52 struct rgp_queue_info rgp_queue_info; 53 struct rgp_queue_event rgp_queue_event; 54 55 struct rgp_clock_calibration rgp_clock_calibration; 56 57 struct hash_table_u64 *pipeline_bos; 58 }; 59 60 struct ac_sqtt_data_info { 61 uint32_t cur_offset; 62 uint32_t trace_status; 63 union { 64 uint32_t gfx9_write_counter; 65 uint32_t gfx10_dropped_cntr; 66 }; 67 }; 68 69 struct ac_sqtt_data_se { 70 struct ac_sqtt_data_info info; 71 void *data_ptr; 72 uint32_t shader_engine; 73 uint32_t compute_unit; 74 }; 75 76 #define SQTT_MAX_TRACES 6 77 78 struct ac_sqtt_trace { 79 const struct rgp_code_object *rgp_code_object; 80 const struct rgp_loader_events *rgp_loader_events; 81 const struct rgp_pso_correlation *rgp_pso_correlation; 82 const struct rgp_queue_info *rgp_queue_info; 83 const struct rgp_queue_event *rgp_queue_event; 84 const struct rgp_clock_calibration *rgp_clock_calibration; 85 86 uint32_t num_traces; 87 struct ac_sqtt_data_se traces[SQTT_MAX_TRACES]; 88 }; 89 90 uint64_t ac_sqtt_get_info_offset(unsigned se); 91 92 uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, 93 unsigned se); 94 95 void ac_sqtt_init(struct ac_sqtt *data); 96 97 void ac_sqtt_finish(struct ac_sqtt *data); 98 99 bool ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, 100 const struct ac_sqtt_data_info *info); 101 102 /** 103 * Identifiers for RGP SQ thread-tracing markers (Table 1) 104 */ 105 enum rgp_sqtt_marker_identifier 106 { 107 RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0, 108 RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1, 109 RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2, 110 RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3, 111 RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4, 112 RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5, 113 RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6, 114 RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7, 115 RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8, 116 RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9, 117 RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA, 118 RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB, 119 RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC, 120 RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD, 121 RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE, 122 RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF 123 }; 124 125 /** 126 * Command buffer IDs used in RGP SQ thread-tracing markers (only 20 bits). 127 */ 128 union rgp_sqtt_marker_cb_id { 129 struct { 130 uint32_t per_frame : 1; /* Must be 1, frame-based command buffer ID. */ 131 uint32_t frame_index : 7; 132 uint32_t cb_index : 12; /* Command buffer index within the frame. */ 133 uint32_t reserved : 12; 134 } per_frame_cb_id; 135 136 struct { 137 uint32_t per_frame : 1; /* Must be 0, global command buffer ID. */ 138 uint32_t cb_index : 19; /* Global command buffer index. */ 139 uint32_t reserved : 12; 140 } global_cb_id; 141 142 uint32_t all; 143 }; 144 145 /** 146 * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2) 147 */ 148 struct rgp_sqtt_marker_cb_start { 149 union { 150 struct { 151 uint32_t identifier : 4; 152 uint32_t ext_dwords : 3; 153 uint32_t cb_id : 20; 154 uint32_t queue : 5; 155 }; 156 uint32_t dword01; 157 }; 158 union { 159 uint32_t device_id_low; 160 uint32_t dword02; 161 }; 162 union { 163 uint32_t device_id_high; 164 uint32_t dword03; 165 }; 166 union { 167 uint32_t queue_flags; 168 uint32_t dword04; 169 }; 170 }; 171 172 static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16, 173 "rgp_sqtt_marker_cb_start doesn't match RGP spec"); 174 175 /** 176 * 177 * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3) 178 */ 179 struct rgp_sqtt_marker_cb_end { 180 union { 181 struct { 182 uint32_t identifier : 4; 183 uint32_t ext_dwords : 3; 184 uint32_t cb_id : 20; 185 uint32_t reserved : 5; 186 }; 187 uint32_t dword01; 188 }; 189 union { 190 uint32_t device_id_low; 191 uint32_t dword02; 192 }; 193 union { 194 uint32_t device_id_high; 195 uint32_t dword03; 196 }; 197 }; 198 199 static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12, 200 "rgp_sqtt_marker_cb_end doesn't match RGP spec"); 201 202 /** 203 * API types used in RGP SQ thread-tracing markers for the "General API" 204 * packet. 205 */ 206 enum rgp_sqtt_marker_general_api_type 207 { 208 ApiCmdBindPipeline = 0, 209 ApiCmdBindDescriptorSets = 1, 210 ApiCmdBindIndexBuffer = 2, 211 ApiCmdBindVertexBuffers = 3, 212 ApiCmdDraw = 4, 213 ApiCmdDrawIndexed = 5, 214 ApiCmdDrawIndirect = 6, 215 ApiCmdDrawIndexedIndirect = 7, 216 ApiCmdDrawIndirectCountAMD = 8, 217 ApiCmdDrawIndexedIndirectCountAMD = 9, 218 ApiCmdDispatch = 10, 219 ApiCmdDispatchIndirect = 11, 220 ApiCmdCopyBuffer = 12, 221 ApiCmdCopyImage = 13, 222 ApiCmdBlitImage = 14, 223 ApiCmdCopyBufferToImage = 15, 224 ApiCmdCopyImageToBuffer = 16, 225 ApiCmdUpdateBuffer = 17, 226 ApiCmdFillBuffer = 18, 227 ApiCmdClearColorImage = 19, 228 ApiCmdClearDepthStencilImage = 20, 229 ApiCmdClearAttachments = 21, 230 ApiCmdResolveImage = 22, 231 ApiCmdWaitEvents = 23, 232 ApiCmdPipelineBarrier = 24, 233 ApiCmdBeginQuery = 25, 234 ApiCmdEndQuery = 26, 235 ApiCmdResetQueryPool = 27, 236 ApiCmdWriteTimestamp = 28, 237 ApiCmdCopyQueryPoolResults = 29, 238 ApiCmdPushConstants = 30, 239 ApiCmdBeginRenderPass = 31, 240 ApiCmdNextSubpass = 32, 241 ApiCmdEndRenderPass = 33, 242 ApiCmdExecuteCommands = 34, 243 ApiCmdSetViewport = 35, 244 ApiCmdSetScissor = 36, 245 ApiCmdSetLineWidth = 37, 246 ApiCmdSetDepthBias = 38, 247 ApiCmdSetBlendConstants = 39, 248 ApiCmdSetDepthBounds = 40, 249 ApiCmdSetStencilCompareMask = 41, 250 ApiCmdSetStencilWriteMask = 42, 251 ApiCmdSetStencilReference = 43, 252 ApiCmdDrawIndirectCount = 44, 253 ApiCmdDrawIndexedIndirectCount = 45, 254 /* gap */ 255 ApiCmdDrawMeshTasksEXT = 47, 256 ApiCmdDrawMeshTasksIndirectCountEXT = 48, 257 ApiCmdDrawMeshTasksIndirectEXT = 49, 258 259 ApiRayTracingSeparateCompiled = 0x800000, 260 ApiInvalid = 0xffffffff 261 }; 262 263 /** 264 * RGP SQ thread-tracing marker for a "General API" instrumentation packet. 265 */ 266 struct rgp_sqtt_marker_general_api { 267 union { 268 struct { 269 uint32_t identifier : 4; 270 uint32_t ext_dwords : 3; 271 uint32_t api_type : 20; 272 uint32_t is_end : 1; 273 uint32_t reserved : 4; 274 }; 275 uint32_t dword01; 276 }; 277 }; 278 279 static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4, 280 "rgp_sqtt_marker_general_api doesn't match RGP spec"); 281 282 /** 283 * API types used in RGP SQ thread-tracing markers (Table 16). 284 */ 285 enum rgp_sqtt_marker_event_type 286 { 287 EventCmdDraw = 0, 288 EventCmdDrawIndexed = 1, 289 EventCmdDrawIndirect = 2, 290 EventCmdDrawIndexedIndirect = 3, 291 EventCmdDrawIndirectCountAMD = 4, 292 EventCmdDrawIndexedIndirectCountAMD = 5, 293 EventCmdDispatch = 6, 294 EventCmdDispatchIndirect = 7, 295 EventCmdCopyBuffer = 8, 296 EventCmdCopyImage = 9, 297 EventCmdBlitImage = 10, 298 EventCmdCopyBufferToImage = 11, 299 EventCmdCopyImageToBuffer = 12, 300 EventCmdUpdateBuffer = 13, 301 EventCmdFillBuffer = 14, 302 EventCmdClearColorImage = 15, 303 EventCmdClearDepthStencilImage = 16, 304 EventCmdClearAttachments = 17, 305 EventCmdResolveImage = 18, 306 EventCmdWaitEvents = 19, 307 EventCmdPipelineBarrier = 20, 308 EventCmdResetQueryPool = 21, 309 EventCmdCopyQueryPoolResults = 22, 310 EventRenderPassColorClear = 23, 311 EventRenderPassDepthStencilClear = 24, 312 EventRenderPassResolve = 25, 313 EventInternalUnknown = 26, 314 EventCmdDrawIndirectCount = 27, 315 EventCmdDrawIndexedIndirectCount = 28, 316 /* gap */ 317 EventCmdTraceRaysKHR = 30, 318 EventCmdTraceRaysIndirectKHR = 31, 319 EventCmdBuildAccelerationStructuresKHR = 32, 320 EventCmdBuildAccelerationStructuresIndirectKHR = 33, 321 EventCmdCopyAccelerationStructureKHR = 34, 322 EventCmdCopyAccelerationStructureToMemoryKHR = 35, 323 EventCmdCopyMemoryToAccelerationStructureKHR = 36, 324 /* gap */ 325 EventCmdDrawMeshTasksEXT = 41, 326 EventCmdDrawMeshTasksIndirectCountEXT = 42, 327 EventCmdDrawMeshTasksIndirectEXT = 43, 328 EventUnknown = 0x7fff, 329 EventInvalid = 0xffffffff 330 }; 331 332 /** 333 * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4) 334 */ 335 struct rgp_sqtt_marker_event { 336 union { 337 struct { 338 uint32_t identifier : 4; 339 uint32_t ext_dwords : 3; 340 uint32_t api_type : 24; 341 uint32_t has_thread_dims : 1; 342 }; 343 uint32_t dword01; 344 }; 345 union { 346 struct { 347 uint32_t cb_id : 20; 348 uint32_t vertex_offset_reg_idx : 4; 349 uint32_t instance_offset_reg_idx : 4; 350 uint32_t draw_index_reg_idx : 4; 351 }; 352 uint32_t dword02; 353 }; 354 union { 355 uint32_t cmd_id; 356 uint32_t dword03; 357 }; 358 }; 359 360 static_assert(sizeof(struct rgp_sqtt_marker_event) == 12, 361 "rgp_sqtt_marker_event doesn't match RGP spec"); 362 363 /** 364 * Per-dispatch specific marker where workgroup dims are included. 365 */ 366 struct rgp_sqtt_marker_event_with_dims { 367 struct rgp_sqtt_marker_event event; 368 uint32_t thread_x; 369 uint32_t thread_y; 370 uint32_t thread_z; 371 }; 372 373 static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24, 374 "rgp_sqtt_marker_event_with_dims doesn't match RGP spec"); 375 376 /** 377 * "Barrier Start" RGP SQTT instrumentation marker (Table 5) 378 */ 379 struct rgp_sqtt_marker_barrier_start { 380 union { 381 struct { 382 uint32_t identifier : 4; 383 uint32_t ext_dwords : 3; 384 uint32_t cb_id : 20; 385 uint32_t reserved : 5; 386 }; 387 uint32_t dword01; 388 }; 389 union { 390 struct { 391 uint32_t driver_reason : 31; 392 uint32_t internal : 1; 393 }; 394 uint32_t dword02; 395 }; 396 }; 397 398 static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8, 399 "rgp_sqtt_marker_barrier_start doesn't match RGP spec"); 400 401 /** 402 * "Barrier End" RGP SQTT instrumentation marker (Table 6) 403 */ 404 struct rgp_sqtt_marker_barrier_end { 405 union { 406 struct { 407 uint32_t identifier : 4; 408 uint32_t ext_dwords : 3; 409 uint32_t cb_id : 20; 410 uint32_t wait_on_eop_ts : 1; 411 uint32_t vs_partial_flush : 1; 412 uint32_t ps_partial_flush : 1; 413 uint32_t cs_partial_flush : 1; 414 uint32_t pfp_sync_me : 1; 415 }; 416 uint32_t dword01; 417 }; 418 union { 419 struct { 420 uint32_t sync_cp_dma : 1; 421 uint32_t inval_tcp : 1; 422 uint32_t inval_sqI : 1; 423 uint32_t inval_sqK : 1; 424 uint32_t flush_tcc : 1; 425 uint32_t inval_tcc : 1; 426 uint32_t flush_cb : 1; 427 uint32_t inval_cb : 1; 428 uint32_t flush_db : 1; 429 uint32_t inval_db : 1; 430 uint32_t num_layout_transitions : 16; 431 uint32_t inval_gl1 : 1; 432 uint32_t wait_on_ts : 1; 433 uint32_t eop_ts_bottom_of_pipe : 1; 434 uint32_t eos_ts_ps_done : 1; 435 uint32_t eos_ts_cs_done : 1; 436 uint32_t reserved : 1; 437 }; 438 uint32_t dword02; 439 }; 440 }; 441 442 static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8, 443 "rgp_sqtt_marker_barrier_end doesn't match RGP spec"); 444 445 /** 446 * "Layout Transition" RGP SQTT instrumentation marker (Table 7) 447 */ 448 struct rgp_sqtt_marker_layout_transition { 449 union { 450 struct { 451 uint32_t identifier : 4; 452 uint32_t ext_dwords : 3; 453 uint32_t depth_stencil_expand : 1; 454 uint32_t htile_hiz_range_expand : 1; 455 uint32_t depth_stencil_resummarize : 1; 456 uint32_t dcc_decompress : 1; 457 uint32_t fmask_decompress : 1; 458 uint32_t fast_clear_eliminate : 1; 459 uint32_t fmask_color_expand : 1; 460 uint32_t init_mask_ram : 1; 461 uint32_t reserved1 : 17; 462 }; 463 uint32_t dword01; 464 }; 465 union { 466 struct { 467 uint32_t reserved2 : 32; 468 }; 469 uint32_t dword02; 470 }; 471 }; 472 473 static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8, 474 "rgp_sqtt_marker_layout_transition doesn't match RGP spec"); 475 476 477 /** 478 * "User Event" RGP SQTT instrumentation marker (Table 8) 479 */ 480 struct rgp_sqtt_marker_user_event { 481 union { 482 struct { 483 uint32_t identifier : 4; 484 uint32_t reserved0 : 8; 485 uint32_t data_type : 8; 486 uint32_t reserved1 : 12; 487 }; 488 uint32_t dword01; 489 }; 490 }; 491 struct rgp_sqtt_marker_user_event_with_length { 492 struct rgp_sqtt_marker_user_event user_event; 493 uint32_t length; 494 }; 495 496 static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4, 497 "rgp_sqtt_marker_user_event doesn't match RGP spec"); 498 499 enum rgp_sqtt_marker_user_event_type 500 { 501 UserEventTrigger = 0, 502 UserEventPop, 503 UserEventPush, 504 UserEventObjectName, 505 }; 506 507 /** 508 * "Pipeline bind" RGP SQTT instrumentation marker (Table 12) 509 */ 510 struct rgp_sqtt_marker_pipeline_bind { 511 union { 512 struct { 513 uint32_t identifier : 4; 514 uint32_t ext_dwords : 3; 515 uint32_t bind_point : 1; 516 uint32_t cb_id : 20; 517 uint32_t reserved : 4; 518 }; 519 uint32_t dword01; 520 }; 521 union { 522 uint32_t api_pso_hash[2]; 523 struct { 524 uint32_t dword02; 525 uint32_t dword03; 526 }; 527 }; 528 }; 529 530 static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12, 531 "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec"); 532 533 bool ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t api_hash); 534 535 bool ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash, 536 uint64_t base_address); 537 538 bool ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp, 539 uint64_t gpu_timestamp); 540 541 bool ac_check_profile_state(const struct radeon_info *info); 542 543 union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt, 544 enum amd_ip_type ip_type); 545 546 uint32_t ac_sqtt_get_buffer_align_shift(const struct radeon_info *info); 547 548 bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info, 549 struct ac_sqtt_trace *sqtt_trace); 550 551 uint32_t ac_sqtt_get_ctrl(const struct radeon_info *info, bool enable); 552 553 uint32_t ac_sqtt_get_shader_mask(const struct radeon_info *info); 554 555 void ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4, 556 const struct ac_sqtt *sqtt, bool is_compute_queue); 557 558 void ac_sqtt_emit_stop(const struct radeon_info *info, struct ac_pm4_state *pm4, 559 bool is_compute_queue); 560 561 void ac_sqtt_emit_wait(const struct radeon_info *info, struct ac_pm4_state *pm4, 562 const struct ac_sqtt *sqtt, bool is_compute_queue); 563 564 #endif 565