1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Nicolai Hähnle <nicolai.haehnle@amd.com> 25 * 26 */ 27 28 #ifndef R600_QUERY_H 29 #define R600_QUERY_H 30 31 #include "pipe/p_defines.h" 32 #include "pipe/p_state.h" 33 #include "util/list.h" 34 35 struct pipe_context; 36 struct pipe_query; 37 struct pipe_resource; 38 39 struct r600_common_context; 40 struct r600_common_screen; 41 struct r600_query; 42 struct r600_query_hw; 43 struct r600_resource; 44 45 enum { 46 R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, 47 R600_QUERY_SPILL_DRAW_CALLS, 48 R600_QUERY_COMPUTE_CALLS, 49 R600_QUERY_SPILL_COMPUTE_CALLS, 50 R600_QUERY_DMA_CALLS, 51 R600_QUERY_CP_DMA_CALLS, 52 R600_QUERY_NUM_VS_FLUSHES, 53 R600_QUERY_NUM_PS_FLUSHES, 54 R600_QUERY_NUM_CS_FLUSHES, 55 R600_QUERY_NUM_FB_CACHE_FLUSHES, 56 R600_QUERY_NUM_L2_INVALIDATES, 57 R600_QUERY_NUM_L2_WRITEBACKS, 58 R600_QUERY_REQUESTED_VRAM, 59 R600_QUERY_REQUESTED_GTT, 60 R600_QUERY_MAPPED_VRAM, 61 R600_QUERY_MAPPED_GTT, 62 R600_QUERY_BUFFER_WAIT_TIME, 63 R600_QUERY_NUM_GFX_IBS, 64 R600_QUERY_NUM_SDMA_IBS, 65 R600_QUERY_NUM_BYTES_MOVED, 66 R600_QUERY_NUM_EVICTIONS, 67 R600_QUERY_VRAM_USAGE, 68 R600_QUERY_GTT_USAGE, 69 R600_QUERY_GPU_TEMPERATURE, 70 R600_QUERY_CURRENT_GPU_SCLK, 71 R600_QUERY_CURRENT_GPU_MCLK, 72 R600_QUERY_GPU_LOAD, 73 R600_QUERY_GPU_SHADERS_BUSY, 74 R600_QUERY_NUM_COMPILATIONS, 75 R600_QUERY_NUM_SHADERS_CREATED, 76 R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO, 77 R600_QUERY_NUM_SHADER_CACHE_HITS, 78 R600_QUERY_GPIN_ASIC_ID, 79 R600_QUERY_GPIN_NUM_SIMD, 80 R600_QUERY_GPIN_NUM_RB, 81 R600_QUERY_GPIN_NUM_SPI, 82 R600_QUERY_GPIN_NUM_SE, 83 84 R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100, 85 }; 86 87 enum { 88 R600_QUERY_GROUP_GPIN = 0, 89 R600_NUM_SW_QUERY_GROUPS 90 }; 91 92 struct r600_query_ops { 93 void (*destroy)(struct r600_common_context *, struct r600_query *); 94 bool (*begin)(struct r600_common_context *, struct r600_query *); 95 bool (*end)(struct r600_common_context *, struct r600_query *); 96 bool (*get_result)(struct r600_common_context *, 97 struct r600_query *, bool wait, 98 union pipe_query_result *result); 99 void (*get_result_resource)(struct r600_common_context *, 100 struct r600_query *, bool wait, 101 enum pipe_query_value_type result_type, 102 int index, 103 struct pipe_resource *resource, 104 unsigned offset); 105 }; 106 107 struct r600_query { 108 struct r600_query_ops *ops; 109 110 /* The type of query */ 111 unsigned type; 112 }; 113 114 enum { 115 R600_QUERY_HW_FLAG_NO_START = (1 << 0), 116 /* gap */ 117 /* whether begin_query doesn't clear the result */ 118 R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2), 119 }; 120 121 struct r600_query_hw_ops { 122 bool (*prepare_buffer)(struct r600_common_context *, 123 struct r600_query_hw *, 124 struct r600_resource *); 125 void (*emit_start)(struct r600_common_context *, 126 struct r600_query_hw *, 127 struct r600_resource *buffer, uint64_t va); 128 void (*emit_stop)(struct r600_common_context *, 129 struct r600_query_hw *, 130 struct r600_resource *buffer, uint64_t va); 131 void (*clear_result)(struct r600_query_hw *, union pipe_query_result *); 132 void (*add_result)(struct r600_common_context *ctx, 133 struct r600_query_hw *, void *buffer, 134 union pipe_query_result *result); 135 }; 136 137 struct r600_query_buffer { 138 /* The buffer where query results are stored. */ 139 struct r600_resource *buf; 140 /* Offset of the next free result after current query data */ 141 unsigned results_end; 142 /* If a query buffer is full, a new buffer is created and the old one 143 * is put in here. When we calculate the result, we sum up the samples 144 * from all buffers. */ 145 struct r600_query_buffer *previous; 146 }; 147 148 struct r600_query_hw { 149 struct r600_query b; 150 struct r600_query_hw_ops *ops; 151 unsigned flags; 152 153 /* The query buffer and how many results are in it. */ 154 struct r600_query_buffer buffer; 155 /* Size of the result in memory for both begin_query and end_query, 156 * this can be one or two numbers, or it could even be a size of a structure. */ 157 unsigned result_size; 158 /* The number of dwords for begin_query or end_query. */ 159 unsigned num_cs_dw_begin; 160 unsigned num_cs_dw_end; 161 /* Linked list of queries */ 162 struct list_head list; 163 /* For transform feedback: which stream the query is for */ 164 unsigned stream; 165 }; 166 167 bool r600_query_hw_init(struct r600_common_context *rctx, 168 struct r600_query_hw *query); 169 void r600_query_hw_destroy(struct r600_common_context *rctx, 170 struct r600_query *rquery); 171 bool r600_query_hw_begin(struct r600_common_context *rctx, 172 struct r600_query *rquery); 173 bool r600_query_hw_end(struct r600_common_context *rctx, 174 struct r600_query *rquery); 175 bool r600_query_hw_get_result(struct r600_common_context *rctx, 176 struct r600_query *rquery, 177 bool wait, 178 union pipe_query_result *result); 179 180 /* Performance counters */ 181 enum { 182 /* This block is part of the shader engine */ 183 R600_PC_BLOCK_SE = (1 << 0), 184 185 /* Expose per-instance groups instead of summing all instances (within 186 * an SE). */ 187 R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1), 188 189 /* Expose per-SE groups instead of summing instances across SEs. */ 190 R600_PC_BLOCK_SE_GROUPS = (1 << 2), 191 192 /* Shader block */ 193 R600_PC_BLOCK_SHADER = (1 << 3), 194 195 /* Non-shader block with perfcounters windowed by shaders. */ 196 R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4), 197 }; 198 199 /* Describes a hardware block with performance counters. Multiple instances of 200 * each block, possibly per-SE, may exist on the chip. Depending on the block 201 * and on the user's configuration, we either 202 * (a) expose every instance as a performance counter group, 203 * (b) expose a single performance counter group that reports the sum over all 204 * instances, or 205 * (c) expose one performance counter group per instance, but summed over all 206 * shader engines. 207 */ 208 struct r600_perfcounter_block { 209 const char *basename; 210 unsigned flags; 211 unsigned num_counters; 212 unsigned num_selectors; 213 unsigned num_instances; 214 215 unsigned num_groups; 216 char *group_names; 217 unsigned group_name_stride; 218 219 char *selector_names; 220 unsigned selector_name_stride; 221 222 void *data; 223 }; 224 225 struct r600_perfcounters { 226 unsigned num_groups; 227 unsigned num_blocks; 228 struct r600_perfcounter_block *blocks; 229 230 unsigned num_start_cs_dwords; 231 unsigned num_stop_cs_dwords; 232 unsigned num_instance_cs_dwords; 233 unsigned num_shaders_cs_dwords; 234 235 unsigned num_shader_types; 236 const char * const *shader_type_suffixes; 237 const unsigned *shader_type_bits; 238 239 void (*get_size)(struct r600_perfcounter_block *, 240 unsigned count, unsigned *selectors, 241 unsigned *num_select_dw, unsigned *num_read_dw); 242 243 void (*emit_instance)(struct r600_common_context *, 244 int se, int instance); 245 void (*emit_shaders)(struct r600_common_context *, unsigned shaders); 246 void (*emit_select)(struct r600_common_context *, 247 struct r600_perfcounter_block *, 248 unsigned count, unsigned *selectors); 249 void (*emit_start)(struct r600_common_context *, 250 struct r600_resource *buffer, uint64_t va); 251 void (*emit_stop)(struct r600_common_context *, 252 struct r600_resource *buffer, uint64_t va); 253 void (*emit_read)(struct r600_common_context *, 254 struct r600_perfcounter_block *, 255 unsigned count, unsigned *selectors, 256 struct r600_resource *buffer, uint64_t va); 257 258 void (*cleanup)(struct r600_common_screen *); 259 260 bool separate_se; 261 bool separate_instance; 262 }; 263 264 struct pipe_query *r600_create_batch_query(struct pipe_context *ctx, 265 unsigned num_queries, 266 unsigned *query_types); 267 268 int r600_get_perfcounter_info(struct r600_common_screen *, 269 unsigned index, 270 struct pipe_driver_query_info *info); 271 int r600_get_perfcounter_group_info(struct r600_common_screen *, 272 unsigned index, 273 struct pipe_driver_query_group_info *info); 274 275 bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks); 276 void r600_perfcounters_add_block(struct r600_common_screen *, 277 struct r600_perfcounters *, 278 const char *name, unsigned flags, 279 unsigned counters, unsigned selectors, 280 unsigned instances, void *data); 281 void r600_perfcounters_do_destroy(struct r600_perfcounters *); 282 void r600_query_hw_reset_buffers(struct r600_common_context *rctx, 283 struct r600_query_hw *query); 284 285 struct r600_qbo_state { 286 void *saved_compute; 287 struct pipe_constant_buffer saved_const0; 288 struct pipe_shader_buffer saved_ssbo[3]; 289 }; 290 291 #endif /* R600_QUERY_H */ 292