1 /*
2 * Copyright (C) 2013 Christoph Bumiller
3 * Copyright (C) 2015 Samuel Pitoiset
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * Performance monitoring counters interface to gallium.
26 */
27
28 #include "st_debug.h"
29 #include "st_context.h"
30 #include "st_cb_bitmap.h"
31 #include "st_cb_perfmon.h"
32
33 #include "util/bitset.h"
34
35 #include "pipe/p_context.h"
36 #include "pipe/p_screen.h"
37 #include "util/u_memory.h"
38
39 static bool
init_perf_monitor(struct gl_context * ctx,struct gl_perf_monitor_object * m)40 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
41 {
42 struct st_context *st = st_context(ctx);
43 struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
44 struct pipe_context *pipe = st->pipe;
45 unsigned *batch = NULL;
46 unsigned num_active_counters = 0;
47 unsigned max_batch_counters = 0;
48 unsigned num_batch_counters = 0;
49 int gid, cid;
50
51 st_flush_bitmap_cache(st);
52
53 /* Determine the number of active counters. */
54 for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
55 const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
56 const struct st_perf_monitor_group *stg = &st->perfmon[gid];
57
58 if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
59 /* Maximum number of counters reached. Cannot start the session. */
60 if (ST_DEBUG & DEBUG_MESA) {
61 debug_printf("Maximum number of counters reached. "
62 "Cannot start the session!\n");
63 }
64 return false;
65 }
66
67 num_active_counters += m->ActiveGroups[gid];
68 if (stg->has_batch)
69 max_batch_counters += m->ActiveGroups[gid];
70 }
71
72 if (!num_active_counters)
73 return true;
74
75 stm->active_counters = CALLOC(num_active_counters,
76 sizeof(*stm->active_counters));
77 if (!stm->active_counters)
78 return false;
79
80 if (max_batch_counters) {
81 batch = CALLOC(max_batch_counters, sizeof(*batch));
82 if (!batch)
83 return false;
84 }
85
86 /* Create a query for each active counter. */
87 for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
88 const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
89 const struct st_perf_monitor_group *stg = &st->perfmon[gid];
90 BITSET_WORD tmp;
91
92 BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
93 const struct st_perf_monitor_counter *stc = &stg->counters[cid];
94 struct st_perf_counter_object *cntr =
95 &stm->active_counters[stm->num_active_counters];
96
97 cntr->id = cid;
98 cntr->group_id = gid;
99 if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
100 cntr->batch_index = num_batch_counters;
101 batch[num_batch_counters++] = stc->query_type;
102 } else {
103 cntr->query = pipe->create_query(pipe, stc->query_type, 0);
104 if (!cntr->query)
105 goto fail;
106 }
107 ++stm->num_active_counters;
108 }
109 }
110
111 /* Create the batch query. */
112 if (num_batch_counters) {
113 stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
114 batch);
115 stm->batch_result = CALLOC(num_batch_counters, sizeof(stm->batch_result->batch[0]));
116 if (!stm->batch_query || !stm->batch_result)
117 goto fail;
118 }
119
120 FREE(batch);
121 return true;
122
123 fail:
124 FREE(batch);
125 return false;
126 }
127
128 static void
reset_perf_monitor(struct st_perf_monitor_object * stm,struct pipe_context * pipe)129 reset_perf_monitor(struct st_perf_monitor_object *stm,
130 struct pipe_context *pipe)
131 {
132 unsigned i;
133
134 for (i = 0; i < stm->num_active_counters; ++i) {
135 struct pipe_query *query = stm->active_counters[i].query;
136 if (query)
137 pipe->destroy_query(pipe, query);
138 }
139 FREE(stm->active_counters);
140 stm->active_counters = NULL;
141 stm->num_active_counters = 0;
142
143 if (stm->batch_query) {
144 pipe->destroy_query(pipe, stm->batch_query);
145 stm->batch_query = NULL;
146 }
147 FREE(stm->batch_result);
148 stm->batch_result = NULL;
149 }
150
151 static struct gl_perf_monitor_object *
st_NewPerfMonitor(struct gl_context * ctx)152 st_NewPerfMonitor(struct gl_context *ctx)
153 {
154 struct st_perf_monitor_object *stq = ST_CALLOC_STRUCT(st_perf_monitor_object);
155 if (stq)
156 return &stq->base;
157 return NULL;
158 }
159
160 static void
st_DeletePerfMonitor(struct gl_context * ctx,struct gl_perf_monitor_object * m)161 st_DeletePerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
162 {
163 struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
164 struct pipe_context *pipe = st_context(ctx)->pipe;
165
166 reset_perf_monitor(stm, pipe);
167 FREE(stm);
168 }
169
170 static GLboolean
st_BeginPerfMonitor(struct gl_context * ctx,struct gl_perf_monitor_object * m)171 st_BeginPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
172 {
173 struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
174 struct pipe_context *pipe = st_context(ctx)->pipe;
175 unsigned i;
176
177 if (!stm->num_active_counters) {
178 /* Create a query for each active counter before starting
179 * a new monitoring session. */
180 if (!init_perf_monitor(ctx, m))
181 goto fail;
182 }
183
184 /* Start the query for each active counter. */
185 for (i = 0; i < stm->num_active_counters; ++i) {
186 struct pipe_query *query = stm->active_counters[i].query;
187 if (query && !pipe->begin_query(pipe, query))
188 goto fail;
189 }
190
191 if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
192 goto fail;
193
194 return true;
195
196 fail:
197 /* Failed to start the monitoring session. */
198 reset_perf_monitor(stm, pipe);
199 return false;
200 }
201
202 static void
st_EndPerfMonitor(struct gl_context * ctx,struct gl_perf_monitor_object * m)203 st_EndPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
204 {
205 struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
206 struct pipe_context *pipe = st_context(ctx)->pipe;
207 unsigned i;
208
209 /* Stop the query for each active counter. */
210 for (i = 0; i < stm->num_active_counters; ++i) {
211 struct pipe_query *query = stm->active_counters[i].query;
212 if (query)
213 pipe->end_query(pipe, query);
214 }
215
216 if (stm->batch_query)
217 pipe->end_query(pipe, stm->batch_query);
218 }
219
220 static void
st_ResetPerfMonitor(struct gl_context * ctx,struct gl_perf_monitor_object * m)221 st_ResetPerfMonitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
222 {
223 struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
224 struct pipe_context *pipe = st_context(ctx)->pipe;
225
226 if (!m->Ended)
227 st_EndPerfMonitor(ctx, m);
228
229 reset_perf_monitor(stm, pipe);
230
231 if (m->Active)
232 st_BeginPerfMonitor(ctx, m);
233 }
234
235 static GLboolean
st_IsPerfMonitorResultAvailable(struct gl_context * ctx,struct gl_perf_monitor_object * m)236 st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
237 struct gl_perf_monitor_object *m)
238 {
239 struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
240 struct pipe_context *pipe = st_context(ctx)->pipe;
241 unsigned i;
242
243 if (!stm->num_active_counters)
244 return false;
245
246 /* The result of a monitoring session is only available if the query of
247 * each active counter is idle. */
248 for (i = 0; i < stm->num_active_counters; ++i) {
249 struct pipe_query *query = stm->active_counters[i].query;
250 union pipe_query_result result;
251 if (query && !pipe->get_query_result(pipe, query, FALSE, &result)) {
252 /* The query is busy. */
253 return false;
254 }
255 }
256
257 if (stm->batch_query &&
258 !pipe->get_query_result(pipe, stm->batch_query, FALSE, stm->batch_result))
259 return false;
260
261 return true;
262 }
263
264 static void
st_GetPerfMonitorResult(struct gl_context * ctx,struct gl_perf_monitor_object * m,GLsizei dataSize,GLuint * data,GLint * bytesWritten)265 st_GetPerfMonitorResult(struct gl_context *ctx,
266 struct gl_perf_monitor_object *m,
267 GLsizei dataSize,
268 GLuint *data,
269 GLint *bytesWritten)
270 {
271 struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
272 struct pipe_context *pipe = st_context(ctx)->pipe;
273 unsigned i;
274
275 /* Copy data to the supplied array (data).
276 *
277 * The output data format is: <group ID, counter ID, value> for each
278 * active counter. The API allows counters to appear in any order.
279 */
280 GLsizei offset = 0;
281 bool have_batch_query = false;
282
283 if (stm->batch_query)
284 have_batch_query = pipe->get_query_result(pipe, stm->batch_query, TRUE,
285 stm->batch_result);
286
287 /* Read query results for each active counter. */
288 for (i = 0; i < stm->num_active_counters; ++i) {
289 struct st_perf_counter_object *cntr = &stm->active_counters[i];
290 union pipe_query_result result = { 0 };
291 int gid, cid;
292 GLenum type;
293
294 cid = cntr->id;
295 gid = cntr->group_id;
296 type = ctx->PerfMonitor.Groups[gid].Counters[cid].Type;
297
298 if (cntr->query) {
299 if (!pipe->get_query_result(pipe, cntr->query, TRUE, &result))
300 continue;
301 } else {
302 if (!have_batch_query)
303 continue;
304 result.batch[0] = stm->batch_result->batch[cntr->batch_index];
305 }
306
307 data[offset++] = gid;
308 data[offset++] = cid;
309 switch (type) {
310 case GL_UNSIGNED_INT64_AMD:
311 memcpy(&data[offset], &result.u64, sizeof(uint64_t));
312 offset += sizeof(uint64_t) / sizeof(GLuint);
313 break;
314 case GL_UNSIGNED_INT:
315 memcpy(&data[offset], &result.u32, sizeof(uint32_t));
316 offset += sizeof(uint32_t) / sizeof(GLuint);
317 break;
318 case GL_FLOAT:
319 case GL_PERCENTAGE_AMD:
320 memcpy(&data[offset], &result.f, sizeof(GLfloat));
321 offset += sizeof(GLfloat) / sizeof(GLuint);
322 break;
323 }
324 }
325
326 if (bytesWritten)
327 *bytesWritten = offset * sizeof(GLuint);
328 }
329
330
331 bool
st_have_perfmon(struct st_context * st)332 st_have_perfmon(struct st_context *st)
333 {
334 struct pipe_screen *screen = st->pipe->screen;
335
336 if (!screen->get_driver_query_info || !screen->get_driver_query_group_info)
337 return false;
338
339 return screen->get_driver_query_group_info(screen, 0, NULL) != 0;
340 }
341
342 static void
st_InitPerfMonitorGroups(struct gl_context * ctx)343 st_InitPerfMonitorGroups(struct gl_context *ctx)
344 {
345 struct st_context *st = st_context(ctx);
346 struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor;
347 struct pipe_screen *screen = st->pipe->screen;
348 struct gl_perf_monitor_group *groups = NULL;
349 struct st_perf_monitor_group *stgroups = NULL;
350 int num_counters, num_groups;
351 int gid, cid;
352
353 /* Get the number of available queries. */
354 num_counters = screen->get_driver_query_info(screen, 0, NULL);
355
356 /* Get the number of available groups. */
357 num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
358 groups = CALLOC(num_groups, sizeof(*groups));
359 if (!groups)
360 return;
361
362 stgroups = CALLOC(num_groups, sizeof(*stgroups));
363 if (!stgroups)
364 goto fail_only_groups;
365
366 for (gid = 0; gid < num_groups; gid++) {
367 struct gl_perf_monitor_group *g = &groups[perfmon->NumGroups];
368 struct st_perf_monitor_group *stg = &stgroups[perfmon->NumGroups];
369 struct pipe_driver_query_group_info group_info;
370 struct gl_perf_monitor_counter *counters = NULL;
371 struct st_perf_monitor_counter *stcounters = NULL;
372
373 if (!screen->get_driver_query_group_info(screen, gid, &group_info))
374 continue;
375
376 g->Name = group_info.name;
377 g->MaxActiveCounters = group_info.max_active_queries;
378
379 if (group_info.num_queries)
380 counters = CALLOC(group_info.num_queries, sizeof(*counters));
381 if (!counters)
382 goto fail;
383 g->Counters = counters;
384
385 stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
386 if (!stcounters)
387 goto fail;
388 stg->counters = stcounters;
389
390 for (cid = 0; cid < num_counters; cid++) {
391 struct gl_perf_monitor_counter *c = &counters[g->NumCounters];
392 struct st_perf_monitor_counter *stc = &stcounters[g->NumCounters];
393 struct pipe_driver_query_info info;
394
395 if (!screen->get_driver_query_info(screen, cid, &info))
396 continue;
397 if (info.group_id != gid)
398 continue;
399
400 c->Name = info.name;
401 switch (info.type) {
402 case PIPE_DRIVER_QUERY_TYPE_UINT64:
403 case PIPE_DRIVER_QUERY_TYPE_BYTES:
404 case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
405 case PIPE_DRIVER_QUERY_TYPE_HZ:
406 c->Minimum.u64 = 0;
407 c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
408 c->Type = GL_UNSIGNED_INT64_AMD;
409 break;
410 case PIPE_DRIVER_QUERY_TYPE_UINT:
411 c->Minimum.u32 = 0;
412 c->Maximum.u32 = info.max_value.u32 ? info.max_value.u32 : -1;
413 c->Type = GL_UNSIGNED_INT;
414 break;
415 case PIPE_DRIVER_QUERY_TYPE_FLOAT:
416 c->Minimum.f = 0.0;
417 c->Maximum.f = info.max_value.f ? info.max_value.f : -1;
418 c->Type = GL_FLOAT;
419 break;
420 case PIPE_DRIVER_QUERY_TYPE_PERCENTAGE:
421 c->Minimum.f = 0.0f;
422 c->Maximum.f = 100.0f;
423 c->Type = GL_PERCENTAGE_AMD;
424 break;
425 default:
426 unreachable("Invalid driver query type!");
427 }
428
429 stc->query_type = info.query_type;
430 stc->flags = info.flags;
431 if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH)
432 stg->has_batch = true;
433
434 g->NumCounters++;
435 }
436 perfmon->NumGroups++;
437 }
438 perfmon->Groups = groups;
439 st->perfmon = stgroups;
440
441 return;
442
443 fail:
444 for (gid = 0; gid < num_groups; gid++) {
445 FREE(stgroups[gid].counters);
446 FREE((void *)groups[gid].Counters);
447 }
448 FREE(stgroups);
449 fail_only_groups:
450 FREE(groups);
451 }
452
453 void
st_destroy_perfmon(struct st_context * st)454 st_destroy_perfmon(struct st_context *st)
455 {
456 struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor;
457 int gid;
458
459 for (gid = 0; gid < perfmon->NumGroups; gid++) {
460 FREE(st->perfmon[gid].counters);
461 FREE((void *)perfmon->Groups[gid].Counters);
462 }
463 FREE(st->perfmon);
464 FREE((void *)perfmon->Groups);
465 }
466
st_init_perfmon_functions(struct dd_function_table * functions)467 void st_init_perfmon_functions(struct dd_function_table *functions)
468 {
469 functions->InitPerfMonitorGroups = st_InitPerfMonitorGroups;
470 functions->NewPerfMonitor = st_NewPerfMonitor;
471 functions->DeletePerfMonitor = st_DeletePerfMonitor;
472 functions->BeginPerfMonitor = st_BeginPerfMonitor;
473 functions->EndPerfMonitor = st_EndPerfMonitor;
474 functions->ResetPerfMonitor = st_ResetPerfMonitor;
475 functions->IsPerfMonitorResultAvailable = st_IsPerfMonitorResultAvailable;
476 functions->GetPerfMonitorResult = st_GetPerfMonitorResult;
477 }
478