1 /* 2 * Copyright 2020 Advanced Micro Devices, Inc. 3 * Copyright 2020 Valve Corporation 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * on the rights to use, copy, modify, merge, publish, distribute, sub 10 * license, and/or sell copies of the Software, and to permit persons to whom 11 * the Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 * USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26 #ifndef AC_SQTT_H 27 #define AC_SQTT_H 28 29 #include <stdint.h> 30 #include <stdbool.h> 31 32 #include <assert.h> 33 #include "ac_rgp.h" 34 35 struct radeon_cmdbuf; 36 struct radeon_info; 37 38 struct ac_thread_trace_data { 39 struct radeon_cmdbuf *start_cs[2]; 40 struct radeon_cmdbuf *stop_cs[2]; 41 /* struct radeon_winsys_bo or struct pb_buffer */ 42 void *bo; 43 void *ptr; 44 uint32_t buffer_size; 45 int start_frame; 46 char *trigger_file; 47 48 struct rgp_code_object rgp_code_object; 49 struct rgp_loader_events rgp_loader_events; 50 struct rgp_pso_correlation rgp_pso_correlation; 51 52 struct rgp_queue_info rgp_queue_info; 53 struct rgp_queue_event rgp_queue_event; 54 55 struct rgp_clock_calibration rgp_clock_calibration; 56 }; 57 58 #define SQTT_BUFFER_ALIGN_SHIFT 12 59 60 struct ac_thread_trace_info { 61 uint32_t cur_offset; 62 uint32_t trace_status; 63 union { 64 uint32_t gfx9_write_counter; 65 uint32_t gfx10_dropped_cntr; 66 }; 67 }; 68 69 struct ac_thread_trace_se { 70 struct ac_thread_trace_info info; 71 void *data_ptr; 72 uint32_t shader_engine; 73 uint32_t compute_unit; 74 }; 75 76 struct ac_thread_trace { 77 struct ac_thread_trace_data *data; 78 uint32_t num_traces; 79 struct ac_thread_trace_se traces[4]; 80 }; 81 82 uint64_t 83 ac_thread_trace_get_info_offset(unsigned se); 84 85 uint64_t 86 ac_thread_trace_get_data_offset(const struct radeon_info *rad_info, 87 const struct ac_thread_trace_data *data, unsigned se); 88 uint64_t 89 ac_thread_trace_get_info_va(uint64_t va, unsigned se); 90 91 uint64_t 92 ac_thread_trace_get_data_va(const struct radeon_info *rad_info, 93 const struct ac_thread_trace_data *data, uint64_t va, unsigned se); 94 95 bool 96 ac_is_thread_trace_complete(struct radeon_info *rad_info, 97 const struct ac_thread_trace_data *data, 98 const struct ac_thread_trace_info *info); 99 100 uint32_t 101 ac_get_expected_buffer_size(struct radeon_info *rad_info, 102 const struct ac_thread_trace_info *info); 103 104 /** 105 * Identifiers for RGP SQ thread-tracing markers (Table 1) 106 */ 107 enum rgp_sqtt_marker_identifier 108 { 109 RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0, 110 RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1, 111 RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2, 112 RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3, 113 RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4, 114 RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5, 115 RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6, 116 RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7, 117 RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8, 118 RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9, 119 RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA, 120 RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB, 121 RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC, 122 RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD, 123 RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE, 124 RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF 125 }; 126 127 /** 128 * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2) 129 */ 130 struct rgp_sqtt_marker_cb_start { 131 union { 132 struct { 133 uint32_t identifier : 4; 134 uint32_t ext_dwords : 3; 135 uint32_t cb_id : 20; 136 uint32_t queue : 5; 137 }; 138 uint32_t dword01; 139 }; 140 union { 141 uint32_t device_id_low; 142 uint32_t dword02; 143 }; 144 union { 145 uint32_t device_id_high; 146 uint32_t dword03; 147 }; 148 union { 149 uint32_t queue_flags; 150 uint32_t dword04; 151 }; 152 }; 153 154 static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16, 155 "rgp_sqtt_marker_cb_start doesn't match RGP spec"); 156 157 /** 158 * 159 * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3) 160 */ 161 struct rgp_sqtt_marker_cb_end { 162 union { 163 struct { 164 uint32_t identifier : 4; 165 uint32_t ext_dwords : 3; 166 uint32_t cb_id : 20; 167 uint32_t reserved : 5; 168 }; 169 uint32_t dword01; 170 }; 171 union { 172 uint32_t device_id_low; 173 uint32_t dword02; 174 }; 175 union { 176 uint32_t device_id_high; 177 uint32_t dword03; 178 }; 179 }; 180 181 static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12, 182 "rgp_sqtt_marker_cb_end doesn't match RGP spec"); 183 184 /** 185 * API types used in RGP SQ thread-tracing markers for the "General API" 186 * packet. 187 */ 188 enum rgp_sqtt_marker_general_api_type 189 { 190 ApiCmdBindPipeline = 0, 191 ApiCmdBindDescriptorSets = 1, 192 ApiCmdBindIndexBuffer = 2, 193 ApiCmdBindVertexBuffers = 3, 194 ApiCmdDraw = 4, 195 ApiCmdDrawIndexed = 5, 196 ApiCmdDrawIndirect = 6, 197 ApiCmdDrawIndexedIndirect = 7, 198 ApiCmdDrawIndirectCountAMD = 8, 199 ApiCmdDrawIndexedIndirectCountAMD = 9, 200 ApiCmdDispatch = 10, 201 ApiCmdDispatchIndirect = 11, 202 ApiCmdCopyBuffer = 12, 203 ApiCmdCopyImage = 13, 204 ApiCmdBlitImage = 14, 205 ApiCmdCopyBufferToImage = 15, 206 ApiCmdCopyImageToBuffer = 16, 207 ApiCmdUpdateBuffer = 17, 208 ApiCmdFillBuffer = 18, 209 ApiCmdClearColorImage = 19, 210 ApiCmdClearDepthStencilImage = 20, 211 ApiCmdClearAttachments = 21, 212 ApiCmdResolveImage = 22, 213 ApiCmdWaitEvents = 23, 214 ApiCmdPipelineBarrier = 24, 215 ApiCmdBeginQuery = 25, 216 ApiCmdEndQuery = 26, 217 ApiCmdResetQueryPool = 27, 218 ApiCmdWriteTimestamp = 28, 219 ApiCmdCopyQueryPoolResults = 29, 220 ApiCmdPushConstants = 30, 221 ApiCmdBeginRenderPass = 31, 222 ApiCmdNextSubpass = 32, 223 ApiCmdEndRenderPass = 33, 224 ApiCmdExecuteCommands = 34, 225 ApiCmdSetViewport = 35, 226 ApiCmdSetScissor = 36, 227 ApiCmdSetLineWidth = 37, 228 ApiCmdSetDepthBias = 38, 229 ApiCmdSetBlendConstants = 39, 230 ApiCmdSetDepthBounds = 40, 231 ApiCmdSetStencilCompareMask = 41, 232 ApiCmdSetStencilWriteMask = 42, 233 ApiCmdSetStencilReference = 43, 234 ApiCmdDrawIndirectCount = 44, 235 ApiCmdDrawIndexedIndirectCount = 45, 236 ApiInvalid = 0xffffffff 237 }; 238 239 /** 240 * RGP SQ thread-tracing marker for a "General API" instrumentation packet. 241 */ 242 struct rgp_sqtt_marker_general_api { 243 union { 244 struct { 245 uint32_t identifier : 4; 246 uint32_t ext_dwords : 3; 247 uint32_t api_type : 20; 248 uint32_t is_end : 1; 249 uint32_t reserved : 4; 250 }; 251 uint32_t dword01; 252 }; 253 }; 254 255 static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4, 256 "rgp_sqtt_marker_general_api doesn't match RGP spec"); 257 258 /** 259 * API types used in RGP SQ thread-tracing markers (Table 16). 260 */ 261 enum rgp_sqtt_marker_event_type 262 { 263 EventCmdDraw = 0, 264 EventCmdDrawIndexed = 1, 265 EventCmdDrawIndirect = 2, 266 EventCmdDrawIndexedIndirect = 3, 267 EventCmdDrawIndirectCountAMD = 4, 268 EventCmdDrawIndexedIndirectCountAMD = 5, 269 EventCmdDispatch = 6, 270 EventCmdDispatchIndirect = 7, 271 EventCmdCopyBuffer = 8, 272 EventCmdCopyImage = 9, 273 EventCmdBlitImage = 10, 274 EventCmdCopyBufferToImage = 11, 275 EventCmdCopyImageToBuffer = 12, 276 EventCmdUpdateBuffer = 13, 277 EventCmdFillBuffer = 14, 278 EventCmdClearColorImage = 15, 279 EventCmdClearDepthStencilImage = 16, 280 EventCmdClearAttachments = 17, 281 EventCmdResolveImage = 18, 282 EventCmdWaitEvents = 19, 283 EventCmdPipelineBarrier = 20, 284 EventCmdResetQueryPool = 21, 285 EventCmdCopyQueryPoolResults = 22, 286 EventRenderPassColorClear = 23, 287 EventRenderPassDepthStencilClear = 24, 288 EventRenderPassResolve = 25, 289 EventInternalUnknown = 26, 290 EventCmdDrawIndirectCount = 27, 291 EventCmdDrawIndexedIndirectCount = 28, 292 EventInvalid = 0xffffffff 293 }; 294 295 /** 296 * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4) 297 */ 298 struct rgp_sqtt_marker_event { 299 union { 300 struct { 301 uint32_t identifier : 4; 302 uint32_t ext_dwords : 3; 303 uint32_t api_type : 24; 304 uint32_t has_thread_dims : 1; 305 }; 306 uint32_t dword01; 307 }; 308 union { 309 struct { 310 uint32_t cb_id : 20; 311 uint32_t vertex_offset_reg_idx : 4; 312 uint32_t instance_offset_reg_idx : 4; 313 uint32_t draw_index_reg_idx : 4; 314 }; 315 uint32_t dword02; 316 }; 317 union { 318 uint32_t cmd_id; 319 uint32_t dword03; 320 }; 321 }; 322 323 static_assert(sizeof(struct rgp_sqtt_marker_event) == 12, 324 "rgp_sqtt_marker_event doesn't match RGP spec"); 325 326 /** 327 * Per-dispatch specific marker where workgroup dims are included. 328 */ 329 struct rgp_sqtt_marker_event_with_dims { 330 struct rgp_sqtt_marker_event event; 331 uint32_t thread_x; 332 uint32_t thread_y; 333 uint32_t thread_z; 334 }; 335 336 static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24, 337 "rgp_sqtt_marker_event_with_dims doesn't match RGP spec"); 338 339 /** 340 * "Barrier Start" RGP SQTT instrumentation marker (Table 5) 341 */ 342 struct rgp_sqtt_marker_barrier_start { 343 union { 344 struct { 345 uint32_t identifier : 4; 346 uint32_t ext_dwords : 3; 347 uint32_t cb_id : 20; 348 uint32_t reserved : 5; 349 }; 350 uint32_t dword01; 351 }; 352 union { 353 struct { 354 uint32_t driver_reason : 31; 355 uint32_t internal : 1; 356 }; 357 uint32_t dword02; 358 }; 359 }; 360 361 static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8, 362 "rgp_sqtt_marker_barrier_start doesn't match RGP spec"); 363 364 /** 365 * "Barrier End" RGP SQTT instrumentation marker (Table 6) 366 */ 367 struct rgp_sqtt_marker_barrier_end { 368 union { 369 struct { 370 uint32_t identifier : 4; 371 uint32_t ext_dwords : 3; 372 uint32_t cb_id : 20; 373 uint32_t wait_on_eop_ts : 1; 374 uint32_t vs_partial_flush : 1; 375 uint32_t ps_partial_flush : 1; 376 uint32_t cs_partial_flush : 1; 377 uint32_t pfp_sync_me : 1; 378 }; 379 uint32_t dword01; 380 }; 381 union { 382 struct { 383 uint32_t sync_cp_dma : 1; 384 uint32_t inval_tcp : 1; 385 uint32_t inval_sqI : 1; 386 uint32_t inval_sqK : 1; 387 uint32_t flush_tcc : 1; 388 uint32_t inval_tcc : 1; 389 uint32_t flush_cb : 1; 390 uint32_t inval_cb : 1; 391 uint32_t flush_db : 1; 392 uint32_t inval_db : 1; 393 uint32_t num_layout_transitions : 16; 394 uint32_t inval_gl1 : 1; 395 uint32_t reserved : 5; 396 }; 397 uint32_t dword02; 398 }; 399 }; 400 401 static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8, 402 "rgp_sqtt_marker_barrier_end doesn't match RGP spec"); 403 404 /** 405 * "Layout Transition" RGP SQTT instrumentation marker (Table 7) 406 */ 407 struct rgp_sqtt_marker_layout_transition { 408 union { 409 struct { 410 uint32_t identifier : 4; 411 uint32_t ext_dwords : 3; 412 uint32_t depth_stencil_expand : 1; 413 uint32_t htile_hiz_range_expand : 1; 414 uint32_t depth_stencil_resummarize : 1; 415 uint32_t dcc_decompress : 1; 416 uint32_t fmask_decompress : 1; 417 uint32_t fast_clear_eliminate : 1; 418 uint32_t fmask_color_expand : 1; 419 uint32_t init_mask_ram : 1; 420 uint32_t reserved1 : 17; 421 }; 422 uint32_t dword01; 423 }; 424 union { 425 struct { 426 uint32_t reserved2 : 32; 427 }; 428 uint32_t dword02; 429 }; 430 }; 431 432 static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8, 433 "rgp_sqtt_marker_layout_transition doesn't match RGP spec"); 434 435 436 /** 437 * "User Event" RGP SQTT instrumentation marker (Table 8) 438 */ 439 struct rgp_sqtt_marker_user_event { 440 union { 441 struct { 442 uint32_t identifier : 4; 443 uint32_t reserved0 : 8; 444 uint32_t data_type : 8; 445 uint32_t reserved1 : 12; 446 }; 447 uint32_t dword01; 448 }; 449 }; 450 struct rgp_sqtt_marker_user_event_with_length { 451 struct rgp_sqtt_marker_user_event user_event; 452 uint32_t length; 453 }; 454 455 static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4, 456 "rgp_sqtt_marker_user_event doesn't match RGP spec"); 457 458 enum rgp_sqtt_marker_user_event_type 459 { 460 UserEventTrigger = 0, 461 UserEventPop, 462 UserEventPush, 463 UserEventObjectName, 464 }; 465 466 /** 467 * "Pipeline bind" RGP SQTT instrumentation marker (Table 12) 468 */ 469 struct rgp_sqtt_marker_pipeline_bind { 470 union { 471 struct { 472 uint32_t identifier : 4; 473 uint32_t ext_dwords : 3; 474 uint32_t bind_point : 1; 475 uint32_t cb_id : 20; 476 uint32_t reserved : 4; 477 }; 478 uint32_t dword01; 479 }; 480 union { 481 uint32_t api_pso_hash[2]; 482 struct { 483 uint32_t dword02; 484 uint32_t dword03; 485 }; 486 }; 487 }; 488 489 static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12, 490 "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec"); 491 492 493 bool ac_sqtt_add_pso_correlation(struct ac_thread_trace_data *thread_trace_data, 494 uint64_t pipeline_hash); 495 496 bool ac_sqtt_add_code_object_loader_event(struct ac_thread_trace_data *thread_trace_data, 497 uint64_t pipeline_hash, 498 uint64_t base_address); 499 500 bool ac_check_profile_state(const struct radeon_info *info); 501 502 #endif 503