1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Nicolai Hähnle <nicolai.haehnle@amd.com> 25 * 26 */ 27 28 #ifndef R600_QUERY_H 29 #define R600_QUERY_H 30 31 #include "util/u_threaded_context.h" 32 33 struct pipe_context; 34 struct pipe_query; 35 struct pipe_resource; 36 37 struct r600_common_context; 38 struct r600_common_screen; 39 struct r600_query; 40 struct r600_query_hw; 41 struct r600_resource; 42 43 enum { 44 R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, 45 R600_QUERY_DECOMPRESS_CALLS, 46 R600_QUERY_MRT_DRAW_CALLS, 47 R600_QUERY_PRIM_RESTART_CALLS, 48 R600_QUERY_SPILL_DRAW_CALLS, 49 R600_QUERY_COMPUTE_CALLS, 50 R600_QUERY_SPILL_COMPUTE_CALLS, 51 R600_QUERY_DMA_CALLS, 52 R600_QUERY_CP_DMA_CALLS, 53 R600_QUERY_NUM_VS_FLUSHES, 54 R600_QUERY_NUM_PS_FLUSHES, 55 R600_QUERY_NUM_CS_FLUSHES, 56 R600_QUERY_NUM_CB_CACHE_FLUSHES, 57 R600_QUERY_NUM_DB_CACHE_FLUSHES, 58 R600_QUERY_NUM_RESIDENT_HANDLES, 59 R600_QUERY_TC_OFFLOADED_SLOTS, 60 R600_QUERY_TC_DIRECT_SLOTS, 61 R600_QUERY_TC_NUM_SYNCS, 62 R600_QUERY_CS_THREAD_BUSY, 63 R600_QUERY_GALLIUM_THREAD_BUSY, 64 R600_QUERY_REQUESTED_VRAM, 65 R600_QUERY_REQUESTED_GTT, 66 R600_QUERY_MAPPED_VRAM, 67 R600_QUERY_MAPPED_GTT, 68 R600_QUERY_BUFFER_WAIT_TIME, 69 R600_QUERY_NUM_MAPPED_BUFFERS, 70 R600_QUERY_NUM_GFX_IBS, 71 R600_QUERY_NUM_SDMA_IBS, 72 R600_QUERY_GFX_BO_LIST_SIZE, 73 R600_QUERY_NUM_BYTES_MOVED, 74 R600_QUERY_NUM_EVICTIONS, 75 R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS, 76 R600_QUERY_VRAM_USAGE, 77 R600_QUERY_VRAM_VIS_USAGE, 78 R600_QUERY_GTT_USAGE, 79 R600_QUERY_GPU_TEMPERATURE, 80 R600_QUERY_CURRENT_GPU_SCLK, 81 R600_QUERY_CURRENT_GPU_MCLK, 82 R600_QUERY_GPU_LOAD, 83 R600_QUERY_GPU_SHADERS_BUSY, 84 R600_QUERY_GPU_TA_BUSY, 85 R600_QUERY_GPU_GDS_BUSY, 86 R600_QUERY_GPU_VGT_BUSY, 87 R600_QUERY_GPU_IA_BUSY, 88 R600_QUERY_GPU_SX_BUSY, 89 R600_QUERY_GPU_WD_BUSY, 90 R600_QUERY_GPU_BCI_BUSY, 91 R600_QUERY_GPU_SC_BUSY, 92 R600_QUERY_GPU_PA_BUSY, 93 R600_QUERY_GPU_DB_BUSY, 94 R600_QUERY_GPU_CP_BUSY, 95 R600_QUERY_GPU_CB_BUSY, 96 R600_QUERY_GPU_SDMA_BUSY, 97 R600_QUERY_GPU_PFP_BUSY, 98 R600_QUERY_GPU_MEQ_BUSY, 99 R600_QUERY_GPU_ME_BUSY, 100 R600_QUERY_GPU_SURF_SYNC_BUSY, 101 R600_QUERY_GPU_CP_DMA_BUSY, 102 R600_QUERY_GPU_SCRATCH_RAM_BUSY, 103 R600_QUERY_NUM_COMPILATIONS, 104 R600_QUERY_NUM_SHADERS_CREATED, 105 R600_QUERY_NUM_SHADER_CACHE_HITS, 106 R600_QUERY_GPIN_ASIC_ID, 107 R600_QUERY_GPIN_NUM_SIMD, 108 R600_QUERY_GPIN_NUM_RB, 109 R600_QUERY_GPIN_NUM_SPI, 110 R600_QUERY_GPIN_NUM_SE, 111 112 R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100, 113 }; 114 115 enum { 116 R600_QUERY_GROUP_GPIN = 0, 117 R600_NUM_SW_QUERY_GROUPS 118 }; 119 120 struct r600_query_ops { 121 void (*destroy)(struct r600_common_screen *, struct r600_query *); 122 bool (*begin)(struct r600_common_context *, struct r600_query *); 123 bool (*end)(struct r600_common_context *, struct r600_query *); 124 bool (*get_result)(struct r600_common_context *, 125 struct r600_query *, bool wait, 126 union pipe_query_result *result); 127 void (*get_result_resource)(struct r600_common_context *, 128 struct r600_query *, bool wait, 129 enum pipe_query_value_type result_type, 130 int index, 131 struct pipe_resource *resource, 132 unsigned offset); 133 }; 134 135 struct r600_query { 136 struct threaded_query b; 137 struct r600_query_ops *ops; 138 139 /* The type of query */ 140 unsigned type; 141 }; 142 143 enum { 144 R600_QUERY_HW_FLAG_NO_START = (1 << 0), 145 /* gap */ 146 /* whether begin_query doesn't clear the result */ 147 R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2), 148 }; 149 150 struct r600_query_hw_ops { 151 bool (*prepare_buffer)(struct r600_common_screen *, 152 struct r600_query_hw *, 153 struct r600_resource *); 154 void (*emit_start)(struct r600_common_context *, 155 struct r600_query_hw *, 156 struct r600_resource *buffer, uint64_t va); 157 void (*emit_stop)(struct r600_common_context *, 158 struct r600_query_hw *, 159 struct r600_resource *buffer, uint64_t va); 160 void (*clear_result)(struct r600_query_hw *, union pipe_query_result *); 161 void (*add_result)(struct r600_common_screen *screen, 162 struct r600_query_hw *, void *buffer, 163 union pipe_query_result *result); 164 }; 165 166 struct r600_query_buffer { 167 /* The buffer where query results are stored. */ 168 struct r600_resource *buf; 169 /* Offset of the next free result after current query data */ 170 unsigned results_end; 171 /* If a query buffer is full, a new buffer is created and the old one 172 * is put in here. When we calculate the result, we sum up the samples 173 * from all buffers. */ 174 struct r600_query_buffer *previous; 175 }; 176 177 struct r600_query_hw { 178 struct r600_query b; 179 struct r600_query_hw_ops *ops; 180 unsigned flags; 181 182 /* The query buffer and how many results are in it. */ 183 struct r600_query_buffer buffer; 184 /* Size of the result in memory for both begin_query and end_query, 185 * this can be one or two numbers, or it could even be a size of a structure. */ 186 unsigned result_size; 187 /* The number of dwords for begin_query or end_query. */ 188 unsigned num_cs_dw_begin; 189 unsigned num_cs_dw_end; 190 /* Linked list of queries */ 191 struct list_head list; 192 /* For transform feedback: which stream the query is for */ 193 unsigned stream; 194 }; 195 196 bool r600_query_hw_init(struct r600_common_screen *rscreen, 197 struct r600_query_hw *query); 198 void r600_query_hw_destroy(struct r600_common_screen *rscreen, 199 struct r600_query *rquery); 200 bool r600_query_hw_begin(struct r600_common_context *rctx, 201 struct r600_query *rquery); 202 bool r600_query_hw_end(struct r600_common_context *rctx, 203 struct r600_query *rquery); 204 bool r600_query_hw_get_result(struct r600_common_context *rctx, 205 struct r600_query *rquery, 206 bool wait, 207 union pipe_query_result *result); 208 209 /* Performance counters */ 210 enum { 211 /* This block is part of the shader engine */ 212 R600_PC_BLOCK_SE = (1 << 0), 213 214 /* Expose per-instance groups instead of summing all instances (within 215 * an SE). */ 216 R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1), 217 218 /* Expose per-SE groups instead of summing instances across SEs. */ 219 R600_PC_BLOCK_SE_GROUPS = (1 << 2), 220 221 /* Shader block */ 222 R600_PC_BLOCK_SHADER = (1 << 3), 223 224 /* Non-shader block with perfcounters windowed by shaders. */ 225 R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4), 226 }; 227 228 /* Describes a hardware block with performance counters. Multiple instances of 229 * each block, possibly per-SE, may exist on the chip. Depending on the block 230 * and on the user's configuration, we either 231 * (a) expose every instance as a performance counter group, 232 * (b) expose a single performance counter group that reports the sum over all 233 * instances, or 234 * (c) expose one performance counter group per instance, but summed over all 235 * shader engines. 236 */ 237 struct r600_perfcounter_block { 238 const char *basename; 239 unsigned flags; 240 unsigned num_counters; 241 unsigned num_selectors; 242 unsigned num_instances; 243 244 unsigned num_groups; 245 char *group_names; 246 unsigned group_name_stride; 247 248 char *selector_names; 249 unsigned selector_name_stride; 250 251 void *data; 252 }; 253 254 struct r600_perfcounters { 255 unsigned num_groups; 256 unsigned num_blocks; 257 struct r600_perfcounter_block *blocks; 258 259 unsigned num_start_cs_dwords; 260 unsigned num_stop_cs_dwords; 261 unsigned num_instance_cs_dwords; 262 unsigned num_shaders_cs_dwords; 263 264 unsigned num_shader_types; 265 const char * const *shader_type_suffixes; 266 const unsigned *shader_type_bits; 267 268 void (*get_size)(struct r600_perfcounter_block *, 269 unsigned count, unsigned *selectors, 270 unsigned *num_select_dw, unsigned *num_read_dw); 271 272 void (*emit_instance)(struct r600_common_context *, 273 int se, int instance); 274 void (*emit_shaders)(struct r600_common_context *, unsigned shaders); 275 void (*emit_select)(struct r600_common_context *, 276 struct r600_perfcounter_block *, 277 unsigned count, unsigned *selectors); 278 void (*emit_start)(struct r600_common_context *, 279 struct r600_resource *buffer, uint64_t va); 280 void (*emit_stop)(struct r600_common_context *, 281 struct r600_resource *buffer, uint64_t va); 282 void (*emit_read)(struct r600_common_context *, 283 struct r600_perfcounter_block *, 284 unsigned count, unsigned *selectors, 285 struct r600_resource *buffer, uint64_t va); 286 287 void (*cleanup)(struct r600_common_screen *); 288 289 bool separate_se; 290 bool separate_instance; 291 }; 292 293 struct pipe_query *r600_create_batch_query(struct pipe_context *ctx, 294 unsigned num_queries, 295 unsigned *query_types); 296 297 int r600_get_perfcounter_info(struct r600_common_screen *, 298 unsigned index, 299 struct pipe_driver_query_info *info); 300 int r600_get_perfcounter_group_info(struct r600_common_screen *, 301 unsigned index, 302 struct pipe_driver_query_group_info *info); 303 304 bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks); 305 void r600_perfcounters_add_block(struct r600_common_screen *, 306 struct r600_perfcounters *, 307 const char *name, unsigned flags, 308 unsigned counters, unsigned selectors, 309 unsigned instances, void *data); 310 void r600_perfcounters_do_destroy(struct r600_perfcounters *); 311 void r600_query_hw_reset_buffers(struct r600_common_context *rctx, 312 struct r600_query_hw *query); 313 314 struct r600_qbo_state { 315 void *saved_compute; 316 struct pipe_constant_buffer saved_const0; 317 struct pipe_shader_buffer saved_ssbo[3]; 318 }; 319 320 #endif /* R600_QUERY_H */ 321