1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Nicolai Hähnle <nicolai.haehnle@amd.com> 25 * 26 */ 27 28 #ifndef R600_QUERY_H 29 #define R600_QUERY_H 30 31 #include "util/u_threaded_context.h" 32 33 struct pipe_context; 34 struct pipe_query; 35 struct pipe_resource; 36 37 struct r600_common_context; 38 struct r600_common_screen; 39 struct r600_query; 40 struct r600_query_hw; 41 struct r600_resource; 42 43 enum { 44 R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, 45 R600_QUERY_DECOMPRESS_CALLS, 46 R600_QUERY_MRT_DRAW_CALLS, 47 R600_QUERY_PRIM_RESTART_CALLS, 48 R600_QUERY_SPILL_DRAW_CALLS, 49 R600_QUERY_COMPUTE_CALLS, 50 R600_QUERY_SPILL_COMPUTE_CALLS, 51 R600_QUERY_DMA_CALLS, 52 R600_QUERY_CP_DMA_CALLS, 53 R600_QUERY_NUM_VS_FLUSHES, 54 R600_QUERY_NUM_PS_FLUSHES, 55 R600_QUERY_NUM_CS_FLUSHES, 56 R600_QUERY_NUM_CB_CACHE_FLUSHES, 57 R600_QUERY_NUM_DB_CACHE_FLUSHES, 58 R600_QUERY_NUM_RESIDENT_HANDLES, 59 R600_QUERY_TC_OFFLOADED_SLOTS, 60 R600_QUERY_TC_DIRECT_SLOTS, 61 R600_QUERY_TC_NUM_SYNCS, 62 R600_QUERY_CS_THREAD_BUSY, 63 R600_QUERY_GALLIUM_THREAD_BUSY, 64 R600_QUERY_REQUESTED_VRAM, 65 R600_QUERY_REQUESTED_GTT, 66 R600_QUERY_MAPPED_VRAM, 67 R600_QUERY_MAPPED_GTT, 68 R600_QUERY_BUFFER_WAIT_TIME, 69 R600_QUERY_NUM_MAPPED_BUFFERS, 70 R600_QUERY_NUM_GFX_IBS, 71 R600_QUERY_NUM_SDMA_IBS, 72 R600_QUERY_GFX_BO_LIST_SIZE, 73 R600_QUERY_NUM_BYTES_MOVED, 74 R600_QUERY_NUM_EVICTIONS, 75 R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS, 76 R600_QUERY_VRAM_USAGE, 77 R600_QUERY_VRAM_VIS_USAGE, 78 R600_QUERY_GTT_USAGE, 79 R600_QUERY_GPU_TEMPERATURE, 80 R600_QUERY_CURRENT_GPU_SCLK, 81 R600_QUERY_CURRENT_GPU_MCLK, 82 R600_QUERY_GPU_LOAD, 83 R600_QUERY_GPU_SHADERS_BUSY, 84 R600_QUERY_GPU_TA_BUSY, 85 R600_QUERY_GPU_GDS_BUSY, 86 R600_QUERY_GPU_VGT_BUSY, 87 R600_QUERY_GPU_IA_BUSY, 88 R600_QUERY_GPU_SX_BUSY, 89 R600_QUERY_GPU_WD_BUSY, 90 R600_QUERY_GPU_BCI_BUSY, 91 R600_QUERY_GPU_SC_BUSY, 92 R600_QUERY_GPU_PA_BUSY, 93 R600_QUERY_GPU_DB_BUSY, 94 R600_QUERY_GPU_CP_BUSY, 95 R600_QUERY_GPU_CB_BUSY, 96 R600_QUERY_GPU_SDMA_BUSY, 97 R600_QUERY_GPU_PFP_BUSY, 98 R600_QUERY_GPU_MEQ_BUSY, 99 R600_QUERY_GPU_ME_BUSY, 100 R600_QUERY_GPU_SURF_SYNC_BUSY, 101 R600_QUERY_GPU_CP_DMA_BUSY, 102 R600_QUERY_GPU_SCRATCH_RAM_BUSY, 103 R600_QUERY_NUM_COMPILATIONS, 104 R600_QUERY_NUM_SHADERS_CREATED, 105 R600_QUERY_NUM_SHADER_CACHE_HITS, 106 R600_QUERY_GPIN_ASIC_ID, 107 R600_QUERY_GPIN_NUM_SIMD, 108 R600_QUERY_GPIN_NUM_RB, 109 R600_QUERY_GPIN_NUM_SPI, 110 R600_QUERY_GPIN_NUM_SE, 111 112 R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100, 113 }; 114 115 enum { 116 R600_QUERY_GROUP_GPIN = 0, 117 R600_NUM_SW_QUERY_GROUPS 118 }; 119 120 struct r600_query_ops { 121 void (*destroy)(struct r600_common_screen *, struct r600_query *); 122 bool (*begin)(struct r600_common_context *, struct r600_query *); 123 bool (*end)(struct r600_common_context *, struct r600_query *); 124 bool (*get_result)(struct r600_common_context *, 125 struct r600_query *, bool wait, 126 union pipe_query_result *result); 127 void (*get_result_resource)(struct r600_common_context *, 128 struct r600_query *, 129 enum pipe_query_flags flags, 130 enum pipe_query_value_type result_type, 131 int index, 132 struct pipe_resource *resource, 133 unsigned offset); 134 }; 135 136 struct r600_query { 137 struct threaded_query b; 138 struct r600_query_ops *ops; 139 140 /* The type of query */ 141 unsigned type; 142 }; 143 144 enum { 145 R600_QUERY_HW_FLAG_NO_START = (1 << 0), 146 /* gap */ 147 /* whether begin_query doesn't clear the result */ 148 R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2), 149 }; 150 151 struct r600_query_hw_ops { 152 bool (*prepare_buffer)(struct r600_common_screen *, 153 struct r600_query_hw *, 154 struct r600_resource *); 155 void (*emit_start)(struct r600_common_context *, 156 struct r600_query_hw *, 157 struct r600_resource *buffer, uint64_t va); 158 void (*emit_stop)(struct r600_common_context *, 159 struct r600_query_hw *, 160 struct r600_resource *buffer, uint64_t va); 161 void (*clear_result)(struct r600_query_hw *, union pipe_query_result *); 162 void (*add_result)(struct r600_common_screen *screen, 163 struct r600_query_hw *, void *buffer, 164 union pipe_query_result *result); 165 }; 166 167 struct r600_query_buffer { 168 /* The buffer where query results are stored. */ 169 struct r600_resource *buf; 170 /* Offset of the next free result after current query data */ 171 unsigned results_end; 172 /* If a query buffer is full, a new buffer is created and the old one 173 * is put in here. When we calculate the result, we sum up the samples 174 * from all buffers. */ 175 struct r600_query_buffer *previous; 176 }; 177 178 struct r600_query_hw { 179 struct r600_query b; 180 struct r600_query_hw_ops *ops; 181 unsigned flags; 182 183 /* The query buffer and how many results are in it. */ 184 struct r600_query_buffer buffer; 185 /* Size of the result in memory for both begin_query and end_query, 186 * this can be one or two numbers, or it could even be a size of a structure. */ 187 unsigned result_size; 188 /* The number of dwords for begin_query or end_query. */ 189 unsigned num_cs_dw_begin; 190 unsigned num_cs_dw_end; 191 /* Linked list of queries */ 192 struct list_head list; 193 /* For transform feedback: which stream the query is for */ 194 unsigned stream; 195 }; 196 197 bool r600_query_hw_init(struct r600_common_screen *rscreen, 198 struct r600_query_hw *query); 199 void r600_query_hw_destroy(struct r600_common_screen *rscreen, 200 struct r600_query *rquery); 201 bool r600_query_hw_begin(struct r600_common_context *rctx, 202 struct r600_query *rquery); 203 bool r600_query_hw_end(struct r600_common_context *rctx, 204 struct r600_query *rquery); 205 bool r600_query_hw_get_result(struct r600_common_context *rctx, 206 struct r600_query *rquery, 207 bool wait, 208 union pipe_query_result *result); 209 210 /* Performance counters */ 211 enum { 212 /* This block is part of the shader engine */ 213 R600_PC_BLOCK_SE = (1 << 0), 214 215 /* Expose per-instance groups instead of summing all instances (within 216 * an SE). */ 217 R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1), 218 219 /* Expose per-SE groups instead of summing instances across SEs. */ 220 R600_PC_BLOCK_SE_GROUPS = (1 << 2), 221 222 /* Shader block */ 223 R600_PC_BLOCK_SHADER = (1 << 3), 224 225 /* Non-shader block with perfcounters windowed by shaders. */ 226 R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4), 227 }; 228 229 /* Describes a hardware block with performance counters. Multiple instances of 230 * each block, possibly per-SE, may exist on the chip. Depending on the block 231 * and on the user's configuration, we either 232 * (a) expose every instance as a performance counter group, 233 * (b) expose a single performance counter group that reports the sum over all 234 * instances, or 235 * (c) expose one performance counter group per instance, but summed over all 236 * shader engines. 237 */ 238 struct r600_perfcounter_block { 239 const char *basename; 240 unsigned flags; 241 unsigned num_counters; 242 unsigned num_selectors; 243 unsigned num_instances; 244 245 unsigned num_groups; 246 char *group_names; 247 unsigned group_name_stride; 248 249 char *selector_names; 250 unsigned selector_name_stride; 251 252 void *data; 253 }; 254 255 struct r600_perfcounters { 256 unsigned num_groups; 257 unsigned num_blocks; 258 struct r600_perfcounter_block *blocks; 259 260 unsigned num_start_cs_dwords; 261 unsigned num_stop_cs_dwords; 262 unsigned num_instance_cs_dwords; 263 unsigned num_shaders_cs_dwords; 264 265 unsigned num_shader_types; 266 const char * const *shader_type_suffixes; 267 const unsigned *shader_type_bits; 268 269 void (*get_size)(struct r600_perfcounter_block *, 270 unsigned count, unsigned *selectors, 271 unsigned *num_select_dw, unsigned *num_read_dw); 272 273 void (*emit_instance)(struct r600_common_context *, 274 int se, int instance); 275 void (*emit_shaders)(struct r600_common_context *, unsigned shaders); 276 void (*emit_select)(struct r600_common_context *, 277 struct r600_perfcounter_block *, 278 unsigned count, unsigned *selectors); 279 void (*emit_start)(struct r600_common_context *, 280 struct r600_resource *buffer, uint64_t va); 281 void (*emit_stop)(struct r600_common_context *, 282 struct r600_resource *buffer, uint64_t va); 283 void (*emit_read)(struct r600_common_context *, 284 struct r600_perfcounter_block *, 285 unsigned count, unsigned *selectors, 286 struct r600_resource *buffer, uint64_t va); 287 288 void (*cleanup)(struct r600_common_screen *); 289 290 bool separate_se; 291 bool separate_instance; 292 }; 293 294 struct pipe_query *r600_create_batch_query(struct pipe_context *ctx, 295 unsigned num_queries, 296 unsigned *query_types); 297 298 int r600_get_perfcounter_info(struct r600_common_screen *, 299 unsigned index, 300 struct pipe_driver_query_info *info); 301 int r600_get_perfcounter_group_info(struct r600_common_screen *, 302 unsigned index, 303 struct pipe_driver_query_group_info *info); 304 305 bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks); 306 void r600_perfcounters_add_block(struct r600_common_screen *, 307 struct r600_perfcounters *, 308 const char *name, unsigned flags, 309 unsigned counters, unsigned selectors, 310 unsigned instances, void *data); 311 void r600_perfcounters_do_destroy(struct r600_perfcounters *); 312 void r600_query_hw_reset_buffers(struct r600_common_context *rctx, 313 struct r600_query_hw *query); 314 315 struct r600_qbo_state { 316 void *saved_compute; 317 struct pipe_constant_buffer saved_const0; 318 struct pipe_shader_buffer saved_ssbo[3]; 319 }; 320 321 #endif /* R600_QUERY_H */ 322