• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Rob Clark <robclark@freedesktop.org>
26  */
27 
28 /* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */
29 
30 #include "freedreno_query_acc.h"
31 #include "freedreno_resource.h"
32 
33 #include "fd6_context.h"
34 #include "fd6_emit.h"
35 #include "fd6_format.h"
36 #include "fd6_query.h"
37 
38 struct PACKED fd6_query_sample {
39 	uint64_t start;
40 	uint64_t result;
41 	uint64_t stop;
42 };
43 
44 /* offset of a single field of an array of fd6_query_sample: */
45 #define query_sample_idx(aq, idx, field)        \
46 	fd_resource((aq)->prsc)->bo,                \
47 	(idx * sizeof(struct fd6_query_sample)) +   \
48 	offsetof(struct fd6_query_sample, field),   \
49 	0, 0
50 
51 /* offset of a single field of fd6_query_sample: */
52 #define query_sample(aq, field)                 \
53 	query_sample_idx(aq, 0, field)
54 
55 /*
56  * Occlusion Query:
57  *
58  * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
59  * interpret results
60  */
61 
62 static void
occlusion_resume(struct fd_acc_query * aq,struct fd_batch * batch)63 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
64 {
65 	struct fd_ringbuffer *ring = batch->draw;
66 
67 	OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
68 	OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
69 
70 	OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
71 	OUT_RELOC(ring, query_sample(aq, start));
72 
73 	fd6_event_write(batch, ring, ZPASS_DONE, false);
74 
75 	fd6_context(batch->ctx)->samples_passed_queries++;
76 }
77 
78 static void
occlusion_pause(struct fd_acc_query * aq,struct fd_batch * batch)79 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
80 {
81 	struct fd_ringbuffer *ring = batch->draw;
82 
83 	OUT_PKT7(ring, CP_MEM_WRITE, 4);
84 	OUT_RELOC(ring, query_sample(aq, stop));
85 	OUT_RING(ring, 0xffffffff);
86 	OUT_RING(ring, 0xffffffff);
87 
88 	OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
89 
90 	OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
91 	OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
92 
93 	OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
94 	OUT_RELOC(ring, query_sample(aq, stop));
95 
96 	fd6_event_write(batch, ring, ZPASS_DONE, false);
97 
98 	/* To avoid stalling in the draw buffer, emit code the code to compute the
99 	 * counter delta in the epilogue ring.
100 	 */
101 	struct fd_ringbuffer *epilogue = fd_batch_get_epilogue(batch);
102 	fd_wfi(batch, epilogue);
103 
104 	/* result += stop - start: */
105 	OUT_PKT7(epilogue, CP_MEM_TO_MEM, 9);
106 	OUT_RING(epilogue, CP_MEM_TO_MEM_0_DOUBLE |
107 			CP_MEM_TO_MEM_0_NEG_C);
108 	OUT_RELOC(epilogue, query_sample(aq, result));     /* dst */
109 	OUT_RELOC(epilogue, query_sample(aq, result));      /* srcA */
110 	OUT_RELOC(epilogue, query_sample(aq, stop));        /* srcB */
111 	OUT_RELOC(epilogue, query_sample(aq, start));       /* srcC */
112 
113 	fd6_context(batch->ctx)->samples_passed_queries--;
114 }
115 
116 static void
occlusion_counter_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)117 occlusion_counter_result(struct fd_acc_query *aq, void *buf,
118 		union pipe_query_result *result)
119 {
120 	struct fd6_query_sample *sp = buf;
121 	result->u64 = sp->result;
122 }
123 
124 static void
occlusion_predicate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)125 occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
126 		union pipe_query_result *result)
127 {
128 	struct fd6_query_sample *sp = buf;
129 	result->b = !!sp->result;
130 }
131 
132 static const struct fd_acc_sample_provider occlusion_counter = {
133 		.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
134 		.size = sizeof(struct fd6_query_sample),
135 		.resume = occlusion_resume,
136 		.pause = occlusion_pause,
137 		.result = occlusion_counter_result,
138 };
139 
140 static const struct fd_acc_sample_provider occlusion_predicate = {
141 		.query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
142 		.size = sizeof(struct fd6_query_sample),
143 		.resume = occlusion_resume,
144 		.pause = occlusion_pause,
145 		.result = occlusion_predicate_result,
146 };
147 
148 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
149 		.query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
150 		.size = sizeof(struct fd6_query_sample),
151 		.resume = occlusion_resume,
152 		.pause = occlusion_pause,
153 		.result = occlusion_predicate_result,
154 };
155 
156 /*
157  * Timestamp Queries:
158  */
159 
160 static void
timestamp_resume(struct fd_acc_query * aq,struct fd_batch * batch)161 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
162 {
163 	struct fd_ringbuffer *ring = batch->draw;
164 
165 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
166 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
167 			CP_EVENT_WRITE_0_TIMESTAMP);
168 	OUT_RELOC(ring, query_sample(aq, start));
169 	OUT_RING(ring, 0x00000000);
170 
171 	fd_reset_wfi(batch);
172 }
173 
174 static void
time_elapsed_pause(struct fd_acc_query * aq,struct fd_batch * batch)175 time_elapsed_pause(struct fd_acc_query *aq, struct fd_batch *batch)
176 {
177 	struct fd_ringbuffer *ring = batch->draw;
178 
179 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
180 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
181 			CP_EVENT_WRITE_0_TIMESTAMP);
182 	OUT_RELOC(ring, query_sample(aq, stop));
183 	OUT_RING(ring, 0x00000000);
184 
185 	fd_reset_wfi(batch);
186 	fd_wfi(batch, ring);
187 
188 	/* result += stop - start: */
189 	OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
190 	OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
191 			CP_MEM_TO_MEM_0_NEG_C);
192 	OUT_RELOC(ring, query_sample(aq, result));     /* dst */
193 	OUT_RELOC(ring, query_sample(aq, result));      /* srcA */
194 	OUT_RELOC(ring, query_sample(aq, stop));        /* srcB */
195 	OUT_RELOC(ring, query_sample(aq, start));       /* srcC */
196 }
197 
198 static void
timestamp_pause(struct fd_acc_query * aq,struct fd_batch * batch)199 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
200 {
201 	/* We captured a timestamp in timestamp_resume(), nothing to do here. */
202 }
203 
204 /* timestamp logging for fd_log(): */
205 static void
record_timestamp(struct fd_ringbuffer * ring,struct fd_bo * bo,unsigned offset)206 record_timestamp(struct fd_ringbuffer *ring, struct fd_bo *bo, unsigned offset)
207 {
208 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
209 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
210 			CP_EVENT_WRITE_0_TIMESTAMP);
211 	OUT_RELOC(ring, bo, offset, 0, 0);
212 	OUT_RING(ring, 0x00000000);
213 }
214 
215 static uint64_t
ticks_to_ns(uint64_t ts)216 ticks_to_ns(uint64_t ts)
217 {
218 	/* This is based on the 19.2MHz always-on rbbm timer.
219 	 *
220 	 * TODO we should probably query this value from kernel..
221 	 */
222 	return ts * (1000000000 / 19200000);
223 }
224 
225 static void
time_elapsed_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)226 time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
227 		union pipe_query_result *result)
228 {
229 	struct fd6_query_sample *sp = buf;
230 	result->u64 = ticks_to_ns(sp->result);
231 }
232 
233 static void
timestamp_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)234 timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
235 		union pipe_query_result *result)
236 {
237 	struct fd6_query_sample *sp = buf;
238 	result->u64 = ticks_to_ns(sp->start);
239 }
240 
241 static const struct fd_acc_sample_provider time_elapsed = {
242 		.query_type = PIPE_QUERY_TIME_ELAPSED,
243 		.always = true,
244 		.size = sizeof(struct fd6_query_sample),
245 		.resume = timestamp_resume,
246 		.pause = time_elapsed_pause,
247 		.result = time_elapsed_accumulate_result,
248 };
249 
250 /* NOTE: timestamp query isn't going to give terribly sensible results
251  * on a tiler.  But it is needed by qapitrace profile heatmap.  If you
252  * add in a binning pass, the results get even more non-sensical.  So
253  * we just return the timestamp on the last tile and hope that is
254  * kind of good enough.
255  */
256 
257 static const struct fd_acc_sample_provider timestamp = {
258 		.query_type = PIPE_QUERY_TIMESTAMP,
259 		.always = true,
260 		.size = sizeof(struct fd6_query_sample),
261 		.resume = timestamp_resume,
262 		.pause = timestamp_pause,
263 		.result = timestamp_accumulate_result,
264 };
265 
266 struct PACKED fd6_primitives_sample {
267 	struct {
268 		uint64_t emitted, generated;
269 	} start[4], stop[4], result;
270 
271 	uint64_t prim_start[16], prim_stop[16], prim_emitted;
272 };
273 
274 
275 #define primitives_relocw(ring, aq, field) \
276 	OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0);
277 #define primitives_reloc(ring, aq, field) \
278 	OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, offsetof(struct fd6_primitives_sample, field), 0, 0);
279 
280 #ifdef DEBUG_COUNTERS
281 static const unsigned counter_count = 10;
282 static const unsigned counter_base = REG_A6XX_RBBM_PRIMCTR_0_LO;
283 
284 static void
log_counters(struct fd6_primitives_sample * ps)285 log_counters(struct fd6_primitives_sample *ps)
286 {
287 	const char *labels[] = {
288 		"vs_vertices_in",
289 		"vs_primitives_out",
290 		"hs_vertices_in",
291 		"hs_patches_out",
292 		"ds_vertices_in",
293 		"ds_primitives_out",
294 		"gs_primitives_in",
295 		"gs_primitives_out",
296 		"ras_primitives_in",
297 		"x",
298 	};
299 
300 	printf("  counter\t\tstart\t\t\tstop\t\t\tdiff\n");
301 	for (int i = 0; i < counter_count; i++) {
302 		printf("  RBBM_PRIMCTR_%d\t0x%016llx\t0x%016llx\t%lld\t%s\n",
303 				i + (counter_base - REG_A6XX_RBBM_PRIMCTR_0_LO) / 2,
304 				ps->prim_start[i], ps->prim_stop[i], ps->prim_stop[i] - ps->prim_start[i], labels[i]);
305 	}
306 
307 	printf("  so counts\n");
308 	for (int i = 0; i < ARRAY_SIZE(ps->start); i++) {
309 		printf("  CHANNEL %d emitted\t0x%016llx\t0x%016llx\t%lld\n",
310 				i, ps->start[i].generated, ps->stop[i].generated, ps->stop[i].generated - ps->start[i].generated);
311 		printf("  CHANNEL %d generated\t0x%016llx\t0x%016llx\t%lld\n",
312 				i, ps->start[i].emitted, ps->stop[i].emitted, ps->stop[i].emitted - ps->start[i].emitted);
313 	}
314 
315 	printf("generated %lld, emitted %lld\n", ps->result.generated, ps->result.emitted);
316 }
317 
318 #else
319 
320 static const unsigned counter_count = 1;
321 static const unsigned counter_base = REG_A6XX_RBBM_PRIMCTR_8_LO;
322 
323 static void
log_counters(struct fd6_primitives_sample * ps)324 log_counters(struct fd6_primitives_sample *ps)
325 {
326 }
327 
328 #endif
329 
330 static void
primitives_generated_resume(struct fd_acc_query * aq,struct fd_batch * batch)331 primitives_generated_resume(struct fd_acc_query *aq, struct fd_batch *batch)
332 {
333 	struct fd_ringbuffer *ring = batch->draw;
334 
335 	fd_wfi(batch, ring);
336 
337 	OUT_PKT7(ring, CP_REG_TO_MEM, 3);
338 	OUT_RING(ring, CP_REG_TO_MEM_0_64B |
339 			CP_REG_TO_MEM_0_CNT(counter_count) |
340 			CP_REG_TO_MEM_0_REG(counter_base));
341 	primitives_relocw(ring, aq, prim_start);
342 
343 	fd6_event_write(batch, ring, START_PRIMITIVE_CTRS, false);
344 }
345 
346 static void
primitives_generated_pause(struct fd_acc_query * aq,struct fd_batch * batch)347 primitives_generated_pause(struct fd_acc_query *aq, struct fd_batch *batch)
348 {
349 	struct fd_ringbuffer *ring = batch->draw;
350 
351 	fd_wfi(batch, ring);
352 
353 	/* snapshot the end values: */
354 	OUT_PKT7(ring, CP_REG_TO_MEM, 3);
355 	OUT_RING(ring, CP_REG_TO_MEM_0_64B |
356 			CP_REG_TO_MEM_0_CNT(counter_count) |
357 			CP_REG_TO_MEM_0_REG(counter_base));
358 	primitives_relocw(ring, aq, prim_stop);
359 
360 	fd6_event_write(batch, ring, STOP_PRIMITIVE_CTRS, false);
361 
362 	/* result += stop - start: */
363 	OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
364 	OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
365 			CP_MEM_TO_MEM_0_NEG_C | 0x40000000);
366 	primitives_relocw(ring, aq, result.generated);
367 	primitives_reloc(ring, aq, prim_emitted);
368 	primitives_reloc(ring, aq, prim_stop[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2])
369 		primitives_reloc(ring, aq, prim_start[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2]);
370 }
371 
372 static void
primitives_generated_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)373 primitives_generated_result(struct fd_acc_query *aq, void *buf,
374 		union pipe_query_result *result)
375 {
376 	struct fd6_primitives_sample *ps = buf;
377 
378 	log_counters(ps);
379 
380 	result->u64 = ps->result.generated;
381 }
382 
383 static const struct fd_acc_sample_provider primitives_generated = {
384 	.query_type = PIPE_QUERY_PRIMITIVES_GENERATED,
385 	.size = sizeof(struct fd6_primitives_sample),
386 	.resume = primitives_generated_resume,
387 	.pause = primitives_generated_pause,
388 	.result = primitives_generated_result,
389 };
390 
391 static void
primitives_emitted_resume(struct fd_acc_query * aq,struct fd_batch * batch)392 primitives_emitted_resume(struct fd_acc_query *aq, struct fd_batch *batch)
393 {
394 	struct fd_ringbuffer *ring = batch->draw;
395 
396 	fd_wfi(batch, ring);
397 	OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS_LO, 2);
398 	primitives_relocw(ring, aq, start[0]);
399 
400 	fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
401 }
402 
403 static void
primitives_emitted_pause(struct fd_acc_query * aq,struct fd_batch * batch)404 primitives_emitted_pause(struct fd_acc_query *aq, struct fd_batch *batch)
405 {
406 	struct fd_ringbuffer *ring = batch->draw;
407 
408 	fd_wfi(batch, ring);
409 
410 	OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS_LO, 2);
411 	primitives_relocw(ring, aq, stop[0]);
412 	fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
413 
414 	fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
415 
416 	/* result += stop - start: */
417 	OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
418 	OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
419 			CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
420 	primitives_relocw(ring, aq, result.emitted);
421 	primitives_reloc(ring, aq, result.emitted);
422 	primitives_reloc(ring, aq, stop[aq->base.index].emitted);
423 	primitives_reloc(ring, aq, start[aq->base.index].emitted);
424 }
425 
426 static void
primitives_emitted_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)427 primitives_emitted_result(struct fd_acc_query *aq, void *buf,
428 		union pipe_query_result *result)
429 {
430 	struct fd6_primitives_sample *ps = buf;
431 
432 	log_counters(ps);
433 
434 	result->u64 = ps->result.emitted;
435 }
436 
437 static const struct fd_acc_sample_provider primitives_emitted = {
438 	.query_type = PIPE_QUERY_PRIMITIVES_EMITTED,
439 	.size = sizeof(struct fd6_primitives_sample),
440 	.resume = primitives_emitted_resume,
441 	.pause = primitives_emitted_pause,
442 	.result = primitives_emitted_result,
443 };
444 
445 /*
446  * Performance Counter (batch) queries:
447  *
448  * Only one of these is active at a time, per design of the gallium
449  * batch_query API design.  On perfcntr query tracks N query_types,
450  * each of which has a 'fd_batch_query_entry' that maps it back to
451  * the associated group and counter.
452  */
453 
454 struct fd_batch_query_entry {
455 	uint8_t gid;        /* group-id */
456 	uint8_t cid;        /* countable-id within the group */
457 };
458 
459 struct fd_batch_query_data {
460 	struct fd_screen *screen;
461 	unsigned num_query_entries;
462 	struct fd_batch_query_entry query_entries[];
463 };
464 
465 static void
perfcntr_resume(struct fd_acc_query * aq,struct fd_batch * batch)466 perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch)
467 {
468 	struct fd_batch_query_data *data = aq->query_data;
469 	struct fd_screen *screen = data->screen;
470 	struct fd_ringbuffer *ring = batch->draw;
471 
472 	unsigned counters_per_group[screen->num_perfcntr_groups];
473 	memset(counters_per_group, 0, sizeof(counters_per_group));
474 
475 	fd_wfi(batch, ring);
476 
477 	/* configure performance counters for the requested queries: */
478 	for (unsigned i = 0; i < data->num_query_entries; i++) {
479 		struct fd_batch_query_entry *entry = &data->query_entries[i];
480 		const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
481 		unsigned counter_idx = counters_per_group[entry->gid]++;
482 
483 		debug_assert(counter_idx < g->num_counters);
484 
485 		OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
486 		OUT_RING(ring, g->countables[entry->cid].selector);
487 	}
488 
489 	memset(counters_per_group, 0, sizeof(counters_per_group));
490 
491 	/* and snapshot the start values */
492 	for (unsigned i = 0; i < data->num_query_entries; i++) {
493 		struct fd_batch_query_entry *entry = &data->query_entries[i];
494 		const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
495 		unsigned counter_idx = counters_per_group[entry->gid]++;
496 		const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
497 
498 		OUT_PKT7(ring, CP_REG_TO_MEM, 3);
499 		OUT_RING(ring, CP_REG_TO_MEM_0_64B |
500 			CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
501 		OUT_RELOC(ring, query_sample_idx(aq, i, start));
502 	}
503 }
504 
505 static void
perfcntr_pause(struct fd_acc_query * aq,struct fd_batch * batch)506 perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch)
507 {
508 	struct fd_batch_query_data *data = aq->query_data;
509 	struct fd_screen *screen = data->screen;
510 	struct fd_ringbuffer *ring = batch->draw;
511 
512 	unsigned counters_per_group[screen->num_perfcntr_groups];
513 	memset(counters_per_group, 0, sizeof(counters_per_group));
514 
515 	fd_wfi(batch, ring);
516 
517 	/* TODO do we need to bother to turn anything off? */
518 
519 	/* snapshot the end values: */
520 	for (unsigned i = 0; i < data->num_query_entries; i++) {
521 		struct fd_batch_query_entry *entry = &data->query_entries[i];
522 		const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
523 		unsigned counter_idx = counters_per_group[entry->gid]++;
524 		const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
525 
526 		OUT_PKT7(ring, CP_REG_TO_MEM, 3);
527 		OUT_RING(ring, CP_REG_TO_MEM_0_64B |
528 			CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
529 		OUT_RELOC(ring, query_sample_idx(aq, i, stop));
530 	}
531 
532 	/* and compute the result: */
533 	for (unsigned i = 0; i < data->num_query_entries; i++) {
534 		/* result += stop - start: */
535 		OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
536 		OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
537 				CP_MEM_TO_MEM_0_NEG_C);
538 		OUT_RELOC(ring, query_sample_idx(aq, i, result));     /* dst */
539 		OUT_RELOC(ring, query_sample_idx(aq, i, result));      /* srcA */
540 		OUT_RELOC(ring, query_sample_idx(aq, i, stop));        /* srcB */
541 		OUT_RELOC(ring, query_sample_idx(aq, i, start));       /* srcC */
542 	}
543 }
544 
545 static void
perfcntr_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)546 perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
547 		union pipe_query_result *result)
548 {
549 	struct fd_batch_query_data *data = aq->query_data;
550 	struct fd6_query_sample *sp = buf;
551 
552 	for (unsigned i = 0; i < data->num_query_entries; i++) {
553 		result->batch[i].u64 = sp[i].result;
554 	}
555 }
556 
557 static const struct fd_acc_sample_provider perfcntr = {
558 		.query_type = FD_QUERY_FIRST_PERFCNTR,
559 		.always = true,
560 		.resume = perfcntr_resume,
561 		.pause = perfcntr_pause,
562 		.result = perfcntr_accumulate_result,
563 };
564 
565 static struct pipe_query *
fd6_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)566 fd6_create_batch_query(struct pipe_context *pctx,
567 		unsigned num_queries, unsigned *query_types)
568 {
569 	struct fd_context *ctx = fd_context(pctx);
570 	struct fd_screen *screen = ctx->screen;
571 	struct fd_query *q;
572 	struct fd_acc_query *aq;
573 	struct fd_batch_query_data *data;
574 
575 	data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data,
576 			num_queries * sizeof(data->query_entries[0]));
577 
578 	data->screen = screen;
579 	data->num_query_entries = num_queries;
580 
581 	/* validate the requested query_types and ensure we don't try
582 	 * to request more query_types of a given group than we have
583 	 * counters:
584 	 */
585 	unsigned counters_per_group[screen->num_perfcntr_groups];
586 	memset(counters_per_group, 0, sizeof(counters_per_group));
587 
588 	for (unsigned i = 0; i < num_queries; i++) {
589 		unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
590 
591 		/* verify valid query_type, ie. is it actually a perfcntr? */
592 		if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
593 				(idx >= screen->num_perfcntr_queries)) {
594 			debug_printf("invalid batch query query_type: %u\n", query_types[i]);
595 			goto error;
596 		}
597 
598 		struct fd_batch_query_entry *entry = &data->query_entries[i];
599 		struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
600 
601 		entry->gid = pq->group_id;
602 
603 		/* the perfcntr_queries[] table flattens all the countables
604 		 * for each group in series, ie:
605 		 *
606 		 *   (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
607 		 *
608 		 * So to find the countable index just step back through the
609 		 * table to find the first entry with the same group-id.
610 		 */
611 		while (pq > screen->perfcntr_queries) {
612 			pq--;
613 			if (pq->group_id == entry->gid)
614 				entry->cid++;
615 		}
616 
617 		if (counters_per_group[entry->gid] >=
618 				screen->perfcntr_groups[entry->gid].num_counters) {
619 			debug_printf("too many counters for group %u\n", entry->gid);
620 			goto error;
621 		}
622 
623 		counters_per_group[entry->gid]++;
624 	}
625 
626 	q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
627 	aq = fd_acc_query(q);
628 
629 	/* sample buffer size is based on # of queries: */
630 	aq->size = num_queries * sizeof(struct fd6_query_sample);
631 	aq->query_data = data;
632 
633 	return (struct pipe_query *)q;
634 
635 error:
636 	free(data);
637 	return NULL;
638 }
639 
640 void
fd6_query_context_init(struct pipe_context * pctx)641 fd6_query_context_init(struct pipe_context *pctx)
642 {
643 	struct fd_context *ctx = fd_context(pctx);
644 
645 	ctx->create_query = fd_acc_create_query;
646 	ctx->query_set_stage = fd_acc_query_set_stage;
647 
648 	ctx->record_timestamp = record_timestamp;
649 	ctx->ts_to_ns = ticks_to_ns;
650 
651 	pctx->create_batch_query = fd6_create_batch_query;
652 
653 	fd_acc_query_register_provider(pctx, &occlusion_counter);
654 	fd_acc_query_register_provider(pctx, &occlusion_predicate);
655 	fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
656 
657 	fd_acc_query_register_provider(pctx, &time_elapsed);
658 	fd_acc_query_register_provider(pctx, &timestamp);
659 
660 	fd_acc_query_register_provider(pctx, &primitives_generated);
661 	fd_acc_query_register_provider(pctx, &primitives_emitted);
662 }
663