1 /* 2 * Copyright 2020 Advanced Micro Devices, Inc. 3 * Copyright 2020 Valve Corporation 4 * 5 * SPDX-License-Identifier: MIT 6 */ 7 8 #ifndef AC_SQTT_H 9 #define AC_SQTT_H 10 11 #include <stdint.h> 12 #include <stdbool.h> 13 14 #include <assert.h> 15 #include "ac_pm4.h" 16 #include "ac_rgp.h" 17 #include "amd_family.h" 18 19 struct radeon_cmdbuf; 20 struct radeon_info; 21 22 /** 23 * SQ Thread tracing is a tracing mechanism that allows taking a detailed look 24 * at what the shader cores are doing. 25 * 26 * Among the things recorded are: 27 * - draws/dispatches + state 28 * - when each wave starts and stops. 29 * - for one SIMD per SE all instructions executed on that SIMD. 30 * 31 * The hardware stores all these as events in a buffer, no manual barrier 32 * around each command needed. The primary user of this is RGP. 33 */ 34 struct ac_sqtt { 35 struct radeon_cmdbuf *start_cs[2]; 36 struct radeon_cmdbuf *stop_cs[2]; 37 /* struct radeon_winsys_bo or struct pb_buffer */ 38 void *bo; 39 uint64_t buffer_va; 40 void *ptr; 41 uint32_t buffer_size; 42 int start_frame; 43 char *trigger_file; 44 bool instruction_timing_enabled; 45 46 uint32_t cmdbuf_ids_per_queue[AMD_NUM_IP_TYPES]; 47 48 struct rgp_code_object rgp_code_object; 49 struct rgp_loader_events rgp_loader_events; 50 struct rgp_pso_correlation rgp_pso_correlation; 51 52 struct rgp_queue_info rgp_queue_info; 53 struct rgp_queue_event rgp_queue_event; 54 55 struct rgp_clock_calibration rgp_clock_calibration; 56 57 struct hash_table_u64 *pipeline_bos; 58 }; 59 60 #define SQTT_BUFFER_ALIGN_SHIFT 12 61 62 struct ac_sqtt_data_info { 63 uint32_t cur_offset; 64 uint32_t trace_status; 65 union { 66 uint32_t gfx9_write_counter; 67 uint32_t gfx10_dropped_cntr; 68 }; 69 }; 70 71 struct ac_sqtt_data_se { 72 struct ac_sqtt_data_info info; 73 void *data_ptr; 74 uint32_t shader_engine; 75 uint32_t compute_unit; 76 }; 77 78 #define SQTT_MAX_TRACES 6 79 80 struct ac_sqtt_trace { 81 const struct rgp_code_object *rgp_code_object; 82 const struct rgp_loader_events *rgp_loader_events; 83 const struct rgp_pso_correlation *rgp_pso_correlation; 84 const struct rgp_queue_info *rgp_queue_info; 85 const struct rgp_queue_event *rgp_queue_event; 86 const struct rgp_clock_calibration *rgp_clock_calibration; 87 88 uint32_t num_traces; 89 struct ac_sqtt_data_se traces[SQTT_MAX_TRACES]; 90 }; 91 92 uint64_t ac_sqtt_get_info_offset(unsigned se); 93 94 uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, 95 unsigned se); 96 97 void ac_sqtt_init(struct ac_sqtt *data); 98 99 void ac_sqtt_finish(struct ac_sqtt *data); 100 101 bool ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, 102 const struct ac_sqtt_data_info *info); 103 104 /** 105 * Identifiers for RGP SQ thread-tracing markers (Table 1) 106 */ 107 enum rgp_sqtt_marker_identifier 108 { 109 RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0, 110 RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1, 111 RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2, 112 RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3, 113 RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4, 114 RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5, 115 RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6, 116 RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7, 117 RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8, 118 RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9, 119 RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA, 120 RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB, 121 RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC, 122 RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD, 123 RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE, 124 RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF 125 }; 126 127 /** 128 * Command buffer IDs used in RGP SQ thread-tracing markers (only 20 bits). 129 */ 130 union rgp_sqtt_marker_cb_id { 131 struct { 132 uint32_t per_frame : 1; /* Must be 1, frame-based command buffer ID. */ 133 uint32_t frame_index : 7; 134 uint32_t cb_index : 12; /* Command buffer index within the frame. */ 135 uint32_t reserved : 12; 136 } per_frame_cb_id; 137 138 struct { 139 uint32_t per_frame : 1; /* Must be 0, global command buffer ID. */ 140 uint32_t cb_index : 19; /* Global command buffer index. */ 141 uint32_t reserved : 12; 142 } global_cb_id; 143 144 uint32_t all; 145 }; 146 147 /** 148 * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2) 149 */ 150 struct rgp_sqtt_marker_cb_start { 151 union { 152 struct { 153 uint32_t identifier : 4; 154 uint32_t ext_dwords : 3; 155 uint32_t cb_id : 20; 156 uint32_t queue : 5; 157 }; 158 uint32_t dword01; 159 }; 160 union { 161 uint32_t device_id_low; 162 uint32_t dword02; 163 }; 164 union { 165 uint32_t device_id_high; 166 uint32_t dword03; 167 }; 168 union { 169 uint32_t queue_flags; 170 uint32_t dword04; 171 }; 172 }; 173 174 static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16, 175 "rgp_sqtt_marker_cb_start doesn't match RGP spec"); 176 177 /** 178 * 179 * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3) 180 */ 181 struct rgp_sqtt_marker_cb_end { 182 union { 183 struct { 184 uint32_t identifier : 4; 185 uint32_t ext_dwords : 3; 186 uint32_t cb_id : 20; 187 uint32_t reserved : 5; 188 }; 189 uint32_t dword01; 190 }; 191 union { 192 uint32_t device_id_low; 193 uint32_t dword02; 194 }; 195 union { 196 uint32_t device_id_high; 197 uint32_t dword03; 198 }; 199 }; 200 201 static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12, 202 "rgp_sqtt_marker_cb_end doesn't match RGP spec"); 203 204 /** 205 * API types used in RGP SQ thread-tracing markers for the "General API" 206 * packet. 207 */ 208 enum rgp_sqtt_marker_general_api_type 209 { 210 ApiCmdBindPipeline = 0, 211 ApiCmdBindDescriptorSets = 1, 212 ApiCmdBindIndexBuffer = 2, 213 ApiCmdBindVertexBuffers = 3, 214 ApiCmdDraw = 4, 215 ApiCmdDrawIndexed = 5, 216 ApiCmdDrawIndirect = 6, 217 ApiCmdDrawIndexedIndirect = 7, 218 ApiCmdDrawIndirectCountAMD = 8, 219 ApiCmdDrawIndexedIndirectCountAMD = 9, 220 ApiCmdDispatch = 10, 221 ApiCmdDispatchIndirect = 11, 222 ApiCmdCopyBuffer = 12, 223 ApiCmdCopyImage = 13, 224 ApiCmdBlitImage = 14, 225 ApiCmdCopyBufferToImage = 15, 226 ApiCmdCopyImageToBuffer = 16, 227 ApiCmdUpdateBuffer = 17, 228 ApiCmdFillBuffer = 18, 229 ApiCmdClearColorImage = 19, 230 ApiCmdClearDepthStencilImage = 20, 231 ApiCmdClearAttachments = 21, 232 ApiCmdResolveImage = 22, 233 ApiCmdWaitEvents = 23, 234 ApiCmdPipelineBarrier = 24, 235 ApiCmdBeginQuery = 25, 236 ApiCmdEndQuery = 26, 237 ApiCmdResetQueryPool = 27, 238 ApiCmdWriteTimestamp = 28, 239 ApiCmdCopyQueryPoolResults = 29, 240 ApiCmdPushConstants = 30, 241 ApiCmdBeginRenderPass = 31, 242 ApiCmdNextSubpass = 32, 243 ApiCmdEndRenderPass = 33, 244 ApiCmdExecuteCommands = 34, 245 ApiCmdSetViewport = 35, 246 ApiCmdSetScissor = 36, 247 ApiCmdSetLineWidth = 37, 248 ApiCmdSetDepthBias = 38, 249 ApiCmdSetBlendConstants = 39, 250 ApiCmdSetDepthBounds = 40, 251 ApiCmdSetStencilCompareMask = 41, 252 ApiCmdSetStencilWriteMask = 42, 253 ApiCmdSetStencilReference = 43, 254 ApiCmdDrawIndirectCount = 44, 255 ApiCmdDrawIndexedIndirectCount = 45, 256 /* gap */ 257 ApiCmdDrawMeshTasksEXT = 47, 258 ApiCmdDrawMeshTasksIndirectCountEXT = 48, 259 ApiCmdDrawMeshTasksIndirectEXT = 49, 260 261 ApiRayTracingSeparateCompiled = 0x800000, 262 ApiInvalid = 0xffffffff 263 }; 264 265 /** 266 * RGP SQ thread-tracing marker for a "General API" instrumentation packet. 267 */ 268 struct rgp_sqtt_marker_general_api { 269 union { 270 struct { 271 uint32_t identifier : 4; 272 uint32_t ext_dwords : 3; 273 uint32_t api_type : 20; 274 uint32_t is_end : 1; 275 uint32_t reserved : 4; 276 }; 277 uint32_t dword01; 278 }; 279 }; 280 281 static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4, 282 "rgp_sqtt_marker_general_api doesn't match RGP spec"); 283 284 /** 285 * API types used in RGP SQ thread-tracing markers (Table 16). 286 */ 287 enum rgp_sqtt_marker_event_type 288 { 289 EventCmdDraw = 0, 290 EventCmdDrawIndexed = 1, 291 EventCmdDrawIndirect = 2, 292 EventCmdDrawIndexedIndirect = 3, 293 EventCmdDrawIndirectCountAMD = 4, 294 EventCmdDrawIndexedIndirectCountAMD = 5, 295 EventCmdDispatch = 6, 296 EventCmdDispatchIndirect = 7, 297 EventCmdCopyBuffer = 8, 298 EventCmdCopyImage = 9, 299 EventCmdBlitImage = 10, 300 EventCmdCopyBufferToImage = 11, 301 EventCmdCopyImageToBuffer = 12, 302 EventCmdUpdateBuffer = 13, 303 EventCmdFillBuffer = 14, 304 EventCmdClearColorImage = 15, 305 EventCmdClearDepthStencilImage = 16, 306 EventCmdClearAttachments = 17, 307 EventCmdResolveImage = 18, 308 EventCmdWaitEvents = 19, 309 EventCmdPipelineBarrier = 20, 310 EventCmdResetQueryPool = 21, 311 EventCmdCopyQueryPoolResults = 22, 312 EventRenderPassColorClear = 23, 313 EventRenderPassDepthStencilClear = 24, 314 EventRenderPassResolve = 25, 315 EventInternalUnknown = 26, 316 EventCmdDrawIndirectCount = 27, 317 EventCmdDrawIndexedIndirectCount = 28, 318 /* gap */ 319 EventCmdTraceRaysKHR = 30, 320 EventCmdTraceRaysIndirectKHR = 31, 321 EventCmdBuildAccelerationStructuresKHR = 32, 322 EventCmdBuildAccelerationStructuresIndirectKHR = 33, 323 EventCmdCopyAccelerationStructureKHR = 34, 324 EventCmdCopyAccelerationStructureToMemoryKHR = 35, 325 EventCmdCopyMemoryToAccelerationStructureKHR = 36, 326 /* gap */ 327 EventCmdDrawMeshTasksEXT = 41, 328 EventCmdDrawMeshTasksIndirectCountEXT = 42, 329 EventCmdDrawMeshTasksIndirectEXT = 43, 330 EventUnknown = 0x7fff, 331 EventInvalid = 0xffffffff 332 }; 333 334 /** 335 * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4) 336 */ 337 struct rgp_sqtt_marker_event { 338 union { 339 struct { 340 uint32_t identifier : 4; 341 uint32_t ext_dwords : 3; 342 uint32_t api_type : 24; 343 uint32_t has_thread_dims : 1; 344 }; 345 uint32_t dword01; 346 }; 347 union { 348 struct { 349 uint32_t cb_id : 20; 350 uint32_t vertex_offset_reg_idx : 4; 351 uint32_t instance_offset_reg_idx : 4; 352 uint32_t draw_index_reg_idx : 4; 353 }; 354 uint32_t dword02; 355 }; 356 union { 357 uint32_t cmd_id; 358 uint32_t dword03; 359 }; 360 }; 361 362 static_assert(sizeof(struct rgp_sqtt_marker_event) == 12, 363 "rgp_sqtt_marker_event doesn't match RGP spec"); 364 365 /** 366 * Per-dispatch specific marker where workgroup dims are included. 367 */ 368 struct rgp_sqtt_marker_event_with_dims { 369 struct rgp_sqtt_marker_event event; 370 uint32_t thread_x; 371 uint32_t thread_y; 372 uint32_t thread_z; 373 }; 374 375 static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24, 376 "rgp_sqtt_marker_event_with_dims doesn't match RGP spec"); 377 378 /** 379 * "Barrier Start" RGP SQTT instrumentation marker (Table 5) 380 */ 381 struct rgp_sqtt_marker_barrier_start { 382 union { 383 struct { 384 uint32_t identifier : 4; 385 uint32_t ext_dwords : 3; 386 uint32_t cb_id : 20; 387 uint32_t reserved : 5; 388 }; 389 uint32_t dword01; 390 }; 391 union { 392 struct { 393 uint32_t driver_reason : 31; 394 uint32_t internal : 1; 395 }; 396 uint32_t dword02; 397 }; 398 }; 399 400 static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8, 401 "rgp_sqtt_marker_barrier_start doesn't match RGP spec"); 402 403 /** 404 * "Barrier End" RGP SQTT instrumentation marker (Table 6) 405 */ 406 struct rgp_sqtt_marker_barrier_end { 407 union { 408 struct { 409 uint32_t identifier : 4; 410 uint32_t ext_dwords : 3; 411 uint32_t cb_id : 20; 412 uint32_t wait_on_eop_ts : 1; 413 uint32_t vs_partial_flush : 1; 414 uint32_t ps_partial_flush : 1; 415 uint32_t cs_partial_flush : 1; 416 uint32_t pfp_sync_me : 1; 417 }; 418 uint32_t dword01; 419 }; 420 union { 421 struct { 422 uint32_t sync_cp_dma : 1; 423 uint32_t inval_tcp : 1; 424 uint32_t inval_sqI : 1; 425 uint32_t inval_sqK : 1; 426 uint32_t flush_tcc : 1; 427 uint32_t inval_tcc : 1; 428 uint32_t flush_cb : 1; 429 uint32_t inval_cb : 1; 430 uint32_t flush_db : 1; 431 uint32_t inval_db : 1; 432 uint32_t num_layout_transitions : 16; 433 uint32_t inval_gl1 : 1; 434 uint32_t wait_on_ts : 1; 435 uint32_t eop_ts_bottom_of_pipe : 1; 436 uint32_t eos_ts_ps_done : 1; 437 uint32_t eos_ts_cs_done : 1; 438 uint32_t reserved : 1; 439 }; 440 uint32_t dword02; 441 }; 442 }; 443 444 static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8, 445 "rgp_sqtt_marker_barrier_end doesn't match RGP spec"); 446 447 /** 448 * "Layout Transition" RGP SQTT instrumentation marker (Table 7) 449 */ 450 struct rgp_sqtt_marker_layout_transition { 451 union { 452 struct { 453 uint32_t identifier : 4; 454 uint32_t ext_dwords : 3; 455 uint32_t depth_stencil_expand : 1; 456 uint32_t htile_hiz_range_expand : 1; 457 uint32_t depth_stencil_resummarize : 1; 458 uint32_t dcc_decompress : 1; 459 uint32_t fmask_decompress : 1; 460 uint32_t fast_clear_eliminate : 1; 461 uint32_t fmask_color_expand : 1; 462 uint32_t init_mask_ram : 1; 463 uint32_t reserved1 : 17; 464 }; 465 uint32_t dword01; 466 }; 467 union { 468 struct { 469 uint32_t reserved2 : 32; 470 }; 471 uint32_t dword02; 472 }; 473 }; 474 475 static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8, 476 "rgp_sqtt_marker_layout_transition doesn't match RGP spec"); 477 478 479 /** 480 * "User Event" RGP SQTT instrumentation marker (Table 8) 481 */ 482 struct rgp_sqtt_marker_user_event { 483 union { 484 struct { 485 uint32_t identifier : 4; 486 uint32_t reserved0 : 8; 487 uint32_t data_type : 8; 488 uint32_t reserved1 : 12; 489 }; 490 uint32_t dword01; 491 }; 492 }; 493 struct rgp_sqtt_marker_user_event_with_length { 494 struct rgp_sqtt_marker_user_event user_event; 495 uint32_t length; 496 }; 497 498 static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4, 499 "rgp_sqtt_marker_user_event doesn't match RGP spec"); 500 501 enum rgp_sqtt_marker_user_event_type 502 { 503 UserEventTrigger = 0, 504 UserEventPop, 505 UserEventPush, 506 UserEventObjectName, 507 }; 508 509 /** 510 * "Pipeline bind" RGP SQTT instrumentation marker (Table 12) 511 */ 512 struct rgp_sqtt_marker_pipeline_bind { 513 union { 514 struct { 515 uint32_t identifier : 4; 516 uint32_t ext_dwords : 3; 517 uint32_t bind_point : 1; 518 uint32_t cb_id : 20; 519 uint32_t reserved : 4; 520 }; 521 uint32_t dword01; 522 }; 523 union { 524 uint32_t api_pso_hash[2]; 525 struct { 526 uint32_t dword02; 527 uint32_t dword03; 528 }; 529 }; 530 }; 531 532 static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12, 533 "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec"); 534 535 bool ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t api_hash); 536 537 bool ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash, 538 uint64_t base_address); 539 540 bool ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp, 541 uint64_t gpu_timestamp); 542 543 bool ac_check_profile_state(const struct radeon_info *info); 544 545 union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt, 546 enum amd_ip_type ip_type); 547 548 bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info, 549 struct ac_sqtt_trace *sqtt_trace); 550 551 uint32_t ac_sqtt_get_ctrl(const struct radeon_info *info, bool enable); 552 553 uint32_t ac_sqtt_get_shader_mask(const struct radeon_info *info); 554 555 void ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4, 556 const struct ac_sqtt *sqtt, bool is_compute_queue); 557 558 void ac_sqtt_emit_stop(const struct radeon_info *info, struct ac_pm4_state *pm4, 559 bool is_compute_queue); 560 561 void ac_sqtt_emit_wait(const struct radeon_info *info, struct ac_pm4_state *pm4, 562 const struct ac_sqtt *sqtt, bool is_compute_queue); 563 564 #endif 565