1 /* 2 * Copyright 2020 Advanced Micro Devices, Inc. 3 * Copyright 2020 Valve Corporation 4 * 5 * SPDX-License-Identifier: MIT 6 */ 7 8 #ifndef AC_SQTT_H 9 #define AC_SQTT_H 10 11 #include <stdint.h> 12 #include <stdbool.h> 13 14 #include <assert.h> 15 #include "ac_rgp.h" 16 #include "amd_family.h" 17 18 struct radeon_cmdbuf; 19 struct radeon_info; 20 21 /** 22 * SQ Thread tracing is a tracing mechanism that allows taking a detailed look 23 * at what the shader cores are doing. 24 * 25 * Among the things recorded are: 26 * - draws/dispatches + state 27 * - when each wave starts and stops. 28 * - for one SIMD per SE all instructions executed on that SIMD. 29 * 30 * The hardware stores all these as events in a buffer, no manual barrier 31 * around each command needed. The primary user of this is RGP. 32 */ 33 struct ac_sqtt { 34 struct radeon_cmdbuf *start_cs[2]; 35 struct radeon_cmdbuf *stop_cs[2]; 36 /* struct radeon_winsys_bo or struct pb_buffer */ 37 void *bo; 38 void *ptr; 39 uint32_t buffer_size; 40 int start_frame; 41 char *trigger_file; 42 43 uint32_t cmdbuf_ids_per_queue[AMD_NUM_IP_TYPES]; 44 45 struct rgp_code_object rgp_code_object; 46 struct rgp_loader_events rgp_loader_events; 47 struct rgp_pso_correlation rgp_pso_correlation; 48 49 struct rgp_queue_info rgp_queue_info; 50 struct rgp_queue_event rgp_queue_event; 51 52 struct rgp_clock_calibration rgp_clock_calibration; 53 54 struct hash_table_u64 *pipeline_bos; 55 }; 56 57 #define SQTT_BUFFER_ALIGN_SHIFT 12 58 59 struct ac_sqtt_data_info { 60 uint32_t cur_offset; 61 uint32_t trace_status; 62 union { 63 uint32_t gfx9_write_counter; 64 uint32_t gfx10_dropped_cntr; 65 }; 66 }; 67 68 struct ac_sqtt_data_se { 69 struct ac_sqtt_data_info info; 70 void *data_ptr; 71 uint32_t shader_engine; 72 uint32_t compute_unit; 73 }; 74 75 #define SQTT_MAX_TRACES 6 76 77 struct ac_sqtt_trace { 78 const struct rgp_code_object *rgp_code_object; 79 const struct rgp_loader_events *rgp_loader_events; 80 const struct rgp_pso_correlation *rgp_pso_correlation; 81 const struct rgp_queue_info *rgp_queue_info; 82 const struct rgp_queue_event *rgp_queue_event; 83 const struct rgp_clock_calibration *rgp_clock_calibration; 84 85 uint32_t num_traces; 86 struct ac_sqtt_data_se traces[SQTT_MAX_TRACES]; 87 }; 88 89 uint64_t ac_sqtt_get_info_offset(unsigned se); 90 91 uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, 92 unsigned se); 93 uint64_t ac_sqtt_get_info_va(uint64_t va, unsigned se); 94 95 uint64_t ac_sqtt_get_data_va(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, 96 uint64_t va, unsigned se); 97 98 void ac_sqtt_init(struct ac_sqtt *data); 99 100 void ac_sqtt_finish(struct ac_sqtt *data); 101 102 bool ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, 103 const struct ac_sqtt_data_info *info); 104 105 uint32_t ac_get_expected_buffer_size(struct radeon_info *rad_info, 106 const struct ac_sqtt_data_info *info); 107 108 /** 109 * Identifiers for RGP SQ thread-tracing markers (Table 1) 110 */ 111 enum rgp_sqtt_marker_identifier 112 { 113 RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0, 114 RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1, 115 RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2, 116 RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3, 117 RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4, 118 RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5, 119 RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6, 120 RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7, 121 RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8, 122 RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9, 123 RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA, 124 RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB, 125 RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC, 126 RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD, 127 RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE, 128 RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF 129 }; 130 131 /** 132 * Command buffer IDs used in RGP SQ thread-tracing markers (only 20 bits). 133 */ 134 union rgp_sqtt_marker_cb_id { 135 struct { 136 uint32_t per_frame : 1; /* Must be 1, frame-based command buffer ID. */ 137 uint32_t frame_index : 7; 138 uint32_t cb_index : 12; /* Command buffer index within the frame. */ 139 uint32_t reserved : 12; 140 } per_frame_cb_id; 141 142 struct { 143 uint32_t per_frame : 1; /* Must be 0, global command buffer ID. */ 144 uint32_t cb_index : 19; /* Global command buffer index. */ 145 uint32_t reserved : 12; 146 } global_cb_id; 147 148 uint32_t all; 149 }; 150 151 /** 152 * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2) 153 */ 154 struct rgp_sqtt_marker_cb_start { 155 union { 156 struct { 157 uint32_t identifier : 4; 158 uint32_t ext_dwords : 3; 159 uint32_t cb_id : 20; 160 uint32_t queue : 5; 161 }; 162 uint32_t dword01; 163 }; 164 union { 165 uint32_t device_id_low; 166 uint32_t dword02; 167 }; 168 union { 169 uint32_t device_id_high; 170 uint32_t dword03; 171 }; 172 union { 173 uint32_t queue_flags; 174 uint32_t dword04; 175 }; 176 }; 177 178 static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16, 179 "rgp_sqtt_marker_cb_start doesn't match RGP spec"); 180 181 /** 182 * 183 * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3) 184 */ 185 struct rgp_sqtt_marker_cb_end { 186 union { 187 struct { 188 uint32_t identifier : 4; 189 uint32_t ext_dwords : 3; 190 uint32_t cb_id : 20; 191 uint32_t reserved : 5; 192 }; 193 uint32_t dword01; 194 }; 195 union { 196 uint32_t device_id_low; 197 uint32_t dword02; 198 }; 199 union { 200 uint32_t device_id_high; 201 uint32_t dword03; 202 }; 203 }; 204 205 static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12, 206 "rgp_sqtt_marker_cb_end doesn't match RGP spec"); 207 208 /** 209 * API types used in RGP SQ thread-tracing markers for the "General API" 210 * packet. 211 */ 212 enum rgp_sqtt_marker_general_api_type 213 { 214 ApiCmdBindPipeline = 0, 215 ApiCmdBindDescriptorSets = 1, 216 ApiCmdBindIndexBuffer = 2, 217 ApiCmdBindVertexBuffers = 3, 218 ApiCmdDraw = 4, 219 ApiCmdDrawIndexed = 5, 220 ApiCmdDrawIndirect = 6, 221 ApiCmdDrawIndexedIndirect = 7, 222 ApiCmdDrawIndirectCountAMD = 8, 223 ApiCmdDrawIndexedIndirectCountAMD = 9, 224 ApiCmdDispatch = 10, 225 ApiCmdDispatchIndirect = 11, 226 ApiCmdCopyBuffer = 12, 227 ApiCmdCopyImage = 13, 228 ApiCmdBlitImage = 14, 229 ApiCmdCopyBufferToImage = 15, 230 ApiCmdCopyImageToBuffer = 16, 231 ApiCmdUpdateBuffer = 17, 232 ApiCmdFillBuffer = 18, 233 ApiCmdClearColorImage = 19, 234 ApiCmdClearDepthStencilImage = 20, 235 ApiCmdClearAttachments = 21, 236 ApiCmdResolveImage = 22, 237 ApiCmdWaitEvents = 23, 238 ApiCmdPipelineBarrier = 24, 239 ApiCmdBeginQuery = 25, 240 ApiCmdEndQuery = 26, 241 ApiCmdResetQueryPool = 27, 242 ApiCmdWriteTimestamp = 28, 243 ApiCmdCopyQueryPoolResults = 29, 244 ApiCmdPushConstants = 30, 245 ApiCmdBeginRenderPass = 31, 246 ApiCmdNextSubpass = 32, 247 ApiCmdEndRenderPass = 33, 248 ApiCmdExecuteCommands = 34, 249 ApiCmdSetViewport = 35, 250 ApiCmdSetScissor = 36, 251 ApiCmdSetLineWidth = 37, 252 ApiCmdSetDepthBias = 38, 253 ApiCmdSetBlendConstants = 39, 254 ApiCmdSetDepthBounds = 40, 255 ApiCmdSetStencilCompareMask = 41, 256 ApiCmdSetStencilWriteMask = 42, 257 ApiCmdSetStencilReference = 43, 258 ApiCmdDrawIndirectCount = 44, 259 ApiCmdDrawIndexedIndirectCount = 45, 260 /* gap */ 261 ApiCmdDrawMeshTasksEXT = 47, 262 ApiCmdDrawMeshTasksIndirectCountEXT = 48, 263 ApiCmdDrawMeshTasksIndirectEXT = 49, 264 265 ApiRayTracingSeparateCompiled = 0x800000, 266 ApiInvalid = 0xffffffff 267 }; 268 269 /** 270 * RGP SQ thread-tracing marker for a "General API" instrumentation packet. 271 */ 272 struct rgp_sqtt_marker_general_api { 273 union { 274 struct { 275 uint32_t identifier : 4; 276 uint32_t ext_dwords : 3; 277 uint32_t api_type : 20; 278 uint32_t is_end : 1; 279 uint32_t reserved : 4; 280 }; 281 uint32_t dword01; 282 }; 283 }; 284 285 static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4, 286 "rgp_sqtt_marker_general_api doesn't match RGP spec"); 287 288 /** 289 * API types used in RGP SQ thread-tracing markers (Table 16). 290 */ 291 enum rgp_sqtt_marker_event_type 292 { 293 EventCmdDraw = 0, 294 EventCmdDrawIndexed = 1, 295 EventCmdDrawIndirect = 2, 296 EventCmdDrawIndexedIndirect = 3, 297 EventCmdDrawIndirectCountAMD = 4, 298 EventCmdDrawIndexedIndirectCountAMD = 5, 299 EventCmdDispatch = 6, 300 EventCmdDispatchIndirect = 7, 301 EventCmdCopyBuffer = 8, 302 EventCmdCopyImage = 9, 303 EventCmdBlitImage = 10, 304 EventCmdCopyBufferToImage = 11, 305 EventCmdCopyImageToBuffer = 12, 306 EventCmdUpdateBuffer = 13, 307 EventCmdFillBuffer = 14, 308 EventCmdClearColorImage = 15, 309 EventCmdClearDepthStencilImage = 16, 310 EventCmdClearAttachments = 17, 311 EventCmdResolveImage = 18, 312 EventCmdWaitEvents = 19, 313 EventCmdPipelineBarrier = 20, 314 EventCmdResetQueryPool = 21, 315 EventCmdCopyQueryPoolResults = 22, 316 EventRenderPassColorClear = 23, 317 EventRenderPassDepthStencilClear = 24, 318 EventRenderPassResolve = 25, 319 EventInternalUnknown = 26, 320 EventCmdDrawIndirectCount = 27, 321 EventCmdDrawIndexedIndirectCount = 28, 322 /* gap */ 323 EventCmdTraceRaysKHR = 30, 324 EventCmdTraceRaysIndirectKHR = 31, 325 EventCmdBuildAccelerationStructuresKHR = 32, 326 EventCmdBuildAccelerationStructuresIndirectKHR = 33, 327 EventCmdCopyAccelerationStructureKHR = 34, 328 EventCmdCopyAccelerationStructureToMemoryKHR = 35, 329 EventCmdCopyMemoryToAccelerationStructureKHR = 36, 330 /* gap */ 331 EventCmdDrawMeshTasksEXT = 41, 332 EventCmdDrawMeshTasksIndirectCountEXT = 42, 333 EventCmdDrawMeshTasksIndirectEXT = 43, 334 EventUnknown = 0x7fff, 335 EventInvalid = 0xffffffff 336 }; 337 338 /** 339 * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4) 340 */ 341 struct rgp_sqtt_marker_event { 342 union { 343 struct { 344 uint32_t identifier : 4; 345 uint32_t ext_dwords : 3; 346 uint32_t api_type : 24; 347 uint32_t has_thread_dims : 1; 348 }; 349 uint32_t dword01; 350 }; 351 union { 352 struct { 353 uint32_t cb_id : 20; 354 uint32_t vertex_offset_reg_idx : 4; 355 uint32_t instance_offset_reg_idx : 4; 356 uint32_t draw_index_reg_idx : 4; 357 }; 358 uint32_t dword02; 359 }; 360 union { 361 uint32_t cmd_id; 362 uint32_t dword03; 363 }; 364 }; 365 366 static_assert(sizeof(struct rgp_sqtt_marker_event) == 12, 367 "rgp_sqtt_marker_event doesn't match RGP spec"); 368 369 /** 370 * Per-dispatch specific marker where workgroup dims are included. 371 */ 372 struct rgp_sqtt_marker_event_with_dims { 373 struct rgp_sqtt_marker_event event; 374 uint32_t thread_x; 375 uint32_t thread_y; 376 uint32_t thread_z; 377 }; 378 379 static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24, 380 "rgp_sqtt_marker_event_with_dims doesn't match RGP spec"); 381 382 /** 383 * "Barrier Start" RGP SQTT instrumentation marker (Table 5) 384 */ 385 struct rgp_sqtt_marker_barrier_start { 386 union { 387 struct { 388 uint32_t identifier : 4; 389 uint32_t ext_dwords : 3; 390 uint32_t cb_id : 20; 391 uint32_t reserved : 5; 392 }; 393 uint32_t dword01; 394 }; 395 union { 396 struct { 397 uint32_t driver_reason : 31; 398 uint32_t internal : 1; 399 }; 400 uint32_t dword02; 401 }; 402 }; 403 404 static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8, 405 "rgp_sqtt_marker_barrier_start doesn't match RGP spec"); 406 407 /** 408 * "Barrier End" RGP SQTT instrumentation marker (Table 6) 409 */ 410 struct rgp_sqtt_marker_barrier_end { 411 union { 412 struct { 413 uint32_t identifier : 4; 414 uint32_t ext_dwords : 3; 415 uint32_t cb_id : 20; 416 uint32_t wait_on_eop_ts : 1; 417 uint32_t vs_partial_flush : 1; 418 uint32_t ps_partial_flush : 1; 419 uint32_t cs_partial_flush : 1; 420 uint32_t pfp_sync_me : 1; 421 }; 422 uint32_t dword01; 423 }; 424 union { 425 struct { 426 uint32_t sync_cp_dma : 1; 427 uint32_t inval_tcp : 1; 428 uint32_t inval_sqI : 1; 429 uint32_t inval_sqK : 1; 430 uint32_t flush_tcc : 1; 431 uint32_t inval_tcc : 1; 432 uint32_t flush_cb : 1; 433 uint32_t inval_cb : 1; 434 uint32_t flush_db : 1; 435 uint32_t inval_db : 1; 436 uint32_t num_layout_transitions : 16; 437 uint32_t inval_gl1 : 1; 438 uint32_t wait_on_ts : 1; 439 uint32_t eop_ts_bottom_of_pipe : 1; 440 uint32_t eos_ts_ps_done : 1; 441 uint32_t eos_ts_cs_done : 1; 442 uint32_t reserved : 1; 443 }; 444 uint32_t dword02; 445 }; 446 }; 447 448 static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8, 449 "rgp_sqtt_marker_barrier_end doesn't match RGP spec"); 450 451 /** 452 * "Layout Transition" RGP SQTT instrumentation marker (Table 7) 453 */ 454 struct rgp_sqtt_marker_layout_transition { 455 union { 456 struct { 457 uint32_t identifier : 4; 458 uint32_t ext_dwords : 3; 459 uint32_t depth_stencil_expand : 1; 460 uint32_t htile_hiz_range_expand : 1; 461 uint32_t depth_stencil_resummarize : 1; 462 uint32_t dcc_decompress : 1; 463 uint32_t fmask_decompress : 1; 464 uint32_t fast_clear_eliminate : 1; 465 uint32_t fmask_color_expand : 1; 466 uint32_t init_mask_ram : 1; 467 uint32_t reserved1 : 17; 468 }; 469 uint32_t dword01; 470 }; 471 union { 472 struct { 473 uint32_t reserved2 : 32; 474 }; 475 uint32_t dword02; 476 }; 477 }; 478 479 static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8, 480 "rgp_sqtt_marker_layout_transition doesn't match RGP spec"); 481 482 483 /** 484 * "User Event" RGP SQTT instrumentation marker (Table 8) 485 */ 486 struct rgp_sqtt_marker_user_event { 487 union { 488 struct { 489 uint32_t identifier : 4; 490 uint32_t reserved0 : 8; 491 uint32_t data_type : 8; 492 uint32_t reserved1 : 12; 493 }; 494 uint32_t dword01; 495 }; 496 }; 497 struct rgp_sqtt_marker_user_event_with_length { 498 struct rgp_sqtt_marker_user_event user_event; 499 uint32_t length; 500 }; 501 502 static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4, 503 "rgp_sqtt_marker_user_event doesn't match RGP spec"); 504 505 enum rgp_sqtt_marker_user_event_type 506 { 507 UserEventTrigger = 0, 508 UserEventPop, 509 UserEventPush, 510 UserEventObjectName, 511 }; 512 513 /** 514 * "Pipeline bind" RGP SQTT instrumentation marker (Table 12) 515 */ 516 struct rgp_sqtt_marker_pipeline_bind { 517 union { 518 struct { 519 uint32_t identifier : 4; 520 uint32_t ext_dwords : 3; 521 uint32_t bind_point : 1; 522 uint32_t cb_id : 20; 523 uint32_t reserved : 4; 524 }; 525 uint32_t dword01; 526 }; 527 union { 528 uint32_t api_pso_hash[2]; 529 struct { 530 uint32_t dword02; 531 uint32_t dword03; 532 }; 533 }; 534 }; 535 536 static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12, 537 "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec"); 538 539 bool ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t api_hash); 540 541 bool ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash, 542 uint64_t base_address); 543 544 bool ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp, 545 uint64_t gpu_timestamp); 546 547 bool ac_check_profile_state(const struct radeon_info *info); 548 549 union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt, 550 enum amd_ip_type ip_type); 551 552 bool ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se); 553 554 bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info, 555 struct ac_sqtt_trace *sqtt_trace); 556 557 uint32_t ac_sqtt_get_shader_mask(const struct radeon_info *info); 558 559 uint32_t ac_sqtt_get_active_cu(const struct radeon_info *info, unsigned se); 560 561 #endif 562