1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24 #ifndef R600_QUERY_H 25 #define R600_QUERY_H 26 27 #include "util/u_threaded_context.h" 28 29 struct pipe_context; 30 struct pipe_query; 31 struct pipe_resource; 32 33 struct r600_common_context; 34 struct si_screen; 35 struct r600_query; 36 struct r600_query_hw; 37 struct r600_resource; 38 39 enum { 40 R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, 41 R600_QUERY_DECOMPRESS_CALLS, 42 R600_QUERY_MRT_DRAW_CALLS, 43 R600_QUERY_PRIM_RESTART_CALLS, 44 R600_QUERY_SPILL_DRAW_CALLS, 45 R600_QUERY_COMPUTE_CALLS, 46 R600_QUERY_SPILL_COMPUTE_CALLS, 47 R600_QUERY_DMA_CALLS, 48 R600_QUERY_CP_DMA_CALLS, 49 R600_QUERY_NUM_VS_FLUSHES, 50 R600_QUERY_NUM_PS_FLUSHES, 51 R600_QUERY_NUM_CS_FLUSHES, 52 R600_QUERY_NUM_CB_CACHE_FLUSHES, 53 R600_QUERY_NUM_DB_CACHE_FLUSHES, 54 R600_QUERY_NUM_L2_INVALIDATES, 55 R600_QUERY_NUM_L2_WRITEBACKS, 56 R600_QUERY_NUM_RESIDENT_HANDLES, 57 R600_QUERY_TC_OFFLOADED_SLOTS, 58 R600_QUERY_TC_DIRECT_SLOTS, 59 R600_QUERY_TC_NUM_SYNCS, 60 R600_QUERY_CS_THREAD_BUSY, 61 R600_QUERY_GALLIUM_THREAD_BUSY, 62 R600_QUERY_REQUESTED_VRAM, 63 R600_QUERY_REQUESTED_GTT, 64 R600_QUERY_MAPPED_VRAM, 65 R600_QUERY_MAPPED_GTT, 66 R600_QUERY_BUFFER_WAIT_TIME, 67 R600_QUERY_NUM_MAPPED_BUFFERS, 68 R600_QUERY_NUM_GFX_IBS, 69 R600_QUERY_NUM_SDMA_IBS, 70 R600_QUERY_GFX_BO_LIST_SIZE, 71 R600_QUERY_GFX_IB_SIZE, 72 R600_QUERY_NUM_BYTES_MOVED, 73 R600_QUERY_NUM_EVICTIONS, 74 R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS, 75 R600_QUERY_VRAM_USAGE, 76 R600_QUERY_VRAM_VIS_USAGE, 77 R600_QUERY_GTT_USAGE, 78 R600_QUERY_GPU_TEMPERATURE, 79 R600_QUERY_CURRENT_GPU_SCLK, 80 R600_QUERY_CURRENT_GPU_MCLK, 81 R600_QUERY_GPU_LOAD, 82 R600_QUERY_GPU_SHADERS_BUSY, 83 R600_QUERY_GPU_TA_BUSY, 84 R600_QUERY_GPU_GDS_BUSY, 85 R600_QUERY_GPU_VGT_BUSY, 86 R600_QUERY_GPU_IA_BUSY, 87 R600_QUERY_GPU_SX_BUSY, 88 R600_QUERY_GPU_WD_BUSY, 89 R600_QUERY_GPU_BCI_BUSY, 90 R600_QUERY_GPU_SC_BUSY, 91 R600_QUERY_GPU_PA_BUSY, 92 R600_QUERY_GPU_DB_BUSY, 93 R600_QUERY_GPU_CP_BUSY, 94 R600_QUERY_GPU_CB_BUSY, 95 R600_QUERY_GPU_SDMA_BUSY, 96 R600_QUERY_GPU_PFP_BUSY, 97 R600_QUERY_GPU_MEQ_BUSY, 98 R600_QUERY_GPU_ME_BUSY, 99 R600_QUERY_GPU_SURF_SYNC_BUSY, 100 R600_QUERY_GPU_CP_DMA_BUSY, 101 R600_QUERY_GPU_SCRATCH_RAM_BUSY, 102 R600_QUERY_NUM_COMPILATIONS, 103 R600_QUERY_NUM_SHADERS_CREATED, 104 R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO, 105 R600_QUERY_NUM_SHADER_CACHE_HITS, 106 R600_QUERY_GPIN_ASIC_ID, 107 R600_QUERY_GPIN_NUM_SIMD, 108 R600_QUERY_GPIN_NUM_RB, 109 R600_QUERY_GPIN_NUM_SPI, 110 R600_QUERY_GPIN_NUM_SE, 111 112 R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100, 113 }; 114 115 enum { 116 R600_QUERY_GROUP_GPIN = 0, 117 R600_NUM_SW_QUERY_GROUPS 118 }; 119 120 struct r600_query_ops { 121 void (*destroy)(struct si_screen *, struct r600_query *); 122 bool (*begin)(struct r600_common_context *, struct r600_query *); 123 bool (*end)(struct r600_common_context *, struct r600_query *); 124 bool (*get_result)(struct r600_common_context *, 125 struct r600_query *, bool wait, 126 union pipe_query_result *result); 127 void (*get_result_resource)(struct r600_common_context *, 128 struct r600_query *, bool wait, 129 enum pipe_query_value_type result_type, 130 int index, 131 struct pipe_resource *resource, 132 unsigned offset); 133 }; 134 135 struct r600_query { 136 struct threaded_query b; 137 struct r600_query_ops *ops; 138 139 /* The type of query */ 140 unsigned type; 141 }; 142 143 enum { 144 R600_QUERY_HW_FLAG_NO_START = (1 << 0), 145 /* gap */ 146 /* whether begin_query doesn't clear the result */ 147 R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2), 148 }; 149 150 struct r600_query_hw_ops { 151 bool (*prepare_buffer)(struct si_screen *, 152 struct r600_query_hw *, 153 struct r600_resource *); 154 void (*emit_start)(struct r600_common_context *, 155 struct r600_query_hw *, 156 struct r600_resource *buffer, uint64_t va); 157 void (*emit_stop)(struct r600_common_context *, 158 struct r600_query_hw *, 159 struct r600_resource *buffer, uint64_t va); 160 void (*clear_result)(struct r600_query_hw *, union pipe_query_result *); 161 void (*add_result)(struct si_screen *screen, 162 struct r600_query_hw *, void *buffer, 163 union pipe_query_result *result); 164 }; 165 166 struct r600_query_buffer { 167 /* The buffer where query results are stored. */ 168 struct r600_resource *buf; 169 /* Offset of the next free result after current query data */ 170 unsigned results_end; 171 /* If a query buffer is full, a new buffer is created and the old one 172 * is put in here. When we calculate the result, we sum up the samples 173 * from all buffers. */ 174 struct r600_query_buffer *previous; 175 }; 176 177 struct r600_query_hw { 178 struct r600_query b; 179 struct r600_query_hw_ops *ops; 180 unsigned flags; 181 182 /* The query buffer and how many results are in it. */ 183 struct r600_query_buffer buffer; 184 /* Size of the result in memory for both begin_query and end_query, 185 * this can be one or two numbers, or it could even be a size of a structure. */ 186 unsigned result_size; 187 /* The number of dwords for begin_query or end_query. */ 188 unsigned num_cs_dw_begin; 189 unsigned num_cs_dw_end; 190 /* Linked list of queries */ 191 struct list_head list; 192 /* For transform feedback: which stream the query is for */ 193 unsigned stream; 194 195 /* Workaround via compute shader */ 196 struct r600_resource *workaround_buf; 197 unsigned workaround_offset; 198 }; 199 200 bool si_query_hw_init(struct si_screen *sscreen, 201 struct r600_query_hw *query); 202 void si_query_hw_destroy(struct si_screen *sscreen, 203 struct r600_query *rquery); 204 bool si_query_hw_begin(struct r600_common_context *rctx, 205 struct r600_query *rquery); 206 bool si_query_hw_end(struct r600_common_context *rctx, 207 struct r600_query *rquery); 208 bool si_query_hw_get_result(struct r600_common_context *rctx, 209 struct r600_query *rquery, 210 bool wait, 211 union pipe_query_result *result); 212 213 /* Performance counters */ 214 enum { 215 /* This block is part of the shader engine */ 216 R600_PC_BLOCK_SE = (1 << 0), 217 218 /* Expose per-instance groups instead of summing all instances (within 219 * an SE). */ 220 R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1), 221 222 /* Expose per-SE groups instead of summing instances across SEs. */ 223 R600_PC_BLOCK_SE_GROUPS = (1 << 2), 224 225 /* Shader block */ 226 R600_PC_BLOCK_SHADER = (1 << 3), 227 228 /* Non-shader block with perfcounters windowed by shaders. */ 229 R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4), 230 }; 231 232 /* Describes a hardware block with performance counters. Multiple instances of 233 * each block, possibly per-SE, may exist on the chip. Depending on the block 234 * and on the user's configuration, we either 235 * (a) expose every instance as a performance counter group, 236 * (b) expose a single performance counter group that reports the sum over all 237 * instances, or 238 * (c) expose one performance counter group per instance, but summed over all 239 * shader engines. 240 */ 241 struct r600_perfcounter_block { 242 const char *basename; 243 unsigned flags; 244 unsigned num_counters; 245 unsigned num_selectors; 246 unsigned num_instances; 247 248 unsigned num_groups; 249 char *group_names; 250 unsigned group_name_stride; 251 252 char *selector_names; 253 unsigned selector_name_stride; 254 255 void *data; 256 }; 257 258 struct r600_perfcounters { 259 unsigned num_groups; 260 unsigned num_blocks; 261 struct r600_perfcounter_block *blocks; 262 263 unsigned num_start_cs_dwords; 264 unsigned num_stop_cs_dwords; 265 unsigned num_instance_cs_dwords; 266 unsigned num_shaders_cs_dwords; 267 268 unsigned num_shader_types; 269 const char * const *shader_type_suffixes; 270 const unsigned *shader_type_bits; 271 272 void (*get_size)(struct r600_perfcounter_block *, 273 unsigned count, unsigned *selectors, 274 unsigned *num_select_dw, unsigned *num_read_dw); 275 276 void (*emit_instance)(struct r600_common_context *, 277 int se, int instance); 278 void (*emit_shaders)(struct r600_common_context *, unsigned shaders); 279 void (*emit_select)(struct r600_common_context *, 280 struct r600_perfcounter_block *, 281 unsigned count, unsigned *selectors); 282 void (*emit_start)(struct r600_common_context *, 283 struct r600_resource *buffer, uint64_t va); 284 void (*emit_stop)(struct r600_common_context *, 285 struct r600_resource *buffer, uint64_t va); 286 void (*emit_read)(struct r600_common_context *, 287 struct r600_perfcounter_block *, 288 unsigned count, unsigned *selectors, 289 struct r600_resource *buffer, uint64_t va); 290 291 void (*cleanup)(struct si_screen *); 292 293 bool separate_se; 294 bool separate_instance; 295 }; 296 297 struct pipe_query *si_create_batch_query(struct pipe_context *ctx, 298 unsigned num_queries, 299 unsigned *query_types); 300 301 int si_get_perfcounter_info(struct si_screen *, 302 unsigned index, 303 struct pipe_driver_query_info *info); 304 int si_get_perfcounter_group_info(struct si_screen *, 305 unsigned index, 306 struct pipe_driver_query_group_info *info); 307 308 bool si_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks); 309 void si_perfcounters_add_block(struct si_screen *, 310 struct r600_perfcounters *, 311 const char *name, unsigned flags, 312 unsigned counters, unsigned selectors, 313 unsigned instances, void *data); 314 void si_perfcounters_do_destroy(struct r600_perfcounters *); 315 void si_query_hw_reset_buffers(struct r600_common_context *rctx, 316 struct r600_query_hw *query); 317 318 struct r600_qbo_state { 319 void *saved_compute; 320 struct pipe_constant_buffer saved_const0; 321 struct pipe_shader_buffer saved_ssbo[3]; 322 }; 323 324 #endif /* R600_QUERY_H */ 325