1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "pipe/p_state.h"
28 #include "util/u_inlines.h"
29 #include "util/u_memory.h"
30
31 #include "freedreno_context.h"
32 #include "freedreno_query_hw.h"
33 #include "freedreno_resource.h"
34 #include "freedreno_util.h"
35
36 struct fd_hw_sample_period {
37 struct fd_hw_sample *start, *end;
38 struct list_head list;
39 };
40
41 static struct fd_hw_sample *
get_sample(struct fd_batch * batch,struct fd_ringbuffer * ring,unsigned query_type)42 get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring,
43 unsigned query_type) assert_dt
44 {
45 struct fd_context *ctx = batch->ctx;
46 struct fd_hw_sample *samp = NULL;
47 int idx = pidx(query_type);
48
49 assume(idx >= 0); /* query never would have been created otherwise */
50
51 if (!batch->sample_cache[idx]) {
52 struct fd_hw_sample *new_samp =
53 ctx->hw_sample_providers[idx]->get_sample(batch, ring);
54 fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp);
55 util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp);
56 fd_batch_needs_flush(batch);
57 }
58
59 fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]);
60
61 return samp;
62 }
63
64 static void
clear_sample_cache(struct fd_batch * batch)65 clear_sample_cache(struct fd_batch *batch)
66 {
67 int i;
68
69 for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++)
70 fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL);
71 }
72
73 static bool
query_active_in_batch(struct fd_batch * batch,struct fd_hw_query * hq)74 query_active_in_batch(struct fd_batch *batch, struct fd_hw_query *hq)
75 {
76 int idx = pidx(hq->provider->query_type);
77 return batch->query_providers_active & (1 << idx);
78 }
79
80 static void
resume_query(struct fd_batch * batch,struct fd_hw_query * hq,struct fd_ringbuffer * ring)81 resume_query(struct fd_batch *batch, struct fd_hw_query *hq,
82 struct fd_ringbuffer *ring) assert_dt
83 {
84 int idx = pidx(hq->provider->query_type);
85 DBG("%p", hq);
86 assert(idx >= 0); /* query never would have been created otherwise */
87 assert(!hq->period);
88 batch->query_providers_used |= (1 << idx);
89 batch->query_providers_active |= (1 << idx);
90 hq->period = slab_alloc_st(&batch->ctx->sample_period_pool);
91 list_inithead(&hq->period->list);
92 hq->period->start = get_sample(batch, ring, hq->base.type);
93 /* NOTE: slab_alloc_st() does not zero out the buffer: */
94 hq->period->end = NULL;
95 }
96
97 static void
pause_query(struct fd_batch * batch,struct fd_hw_query * hq,struct fd_ringbuffer * ring)98 pause_query(struct fd_batch *batch, struct fd_hw_query *hq,
99 struct fd_ringbuffer *ring) assert_dt
100 {
101 ASSERTED int idx = pidx(hq->provider->query_type);
102 DBG("%p", hq);
103 assert(idx >= 0); /* query never would have been created otherwise */
104 assert(hq->period && !hq->period->end);
105 assert(query_active_in_batch(batch, hq));
106 batch->query_providers_active &= ~(1 << idx);
107 hq->period->end = get_sample(batch, ring, hq->base.type);
108 list_addtail(&hq->period->list, &hq->periods);
109 hq->period = NULL;
110 }
111
112 static void
destroy_periods(struct fd_context * ctx,struct fd_hw_query * hq)113 destroy_periods(struct fd_context *ctx, struct fd_hw_query *hq)
114 {
115 struct fd_hw_sample_period *period, *s;
116 LIST_FOR_EACH_ENTRY_SAFE (period, s, &hq->periods, list) {
117 fd_hw_sample_reference(ctx, &period->start, NULL);
118 fd_hw_sample_reference(ctx, &period->end, NULL);
119 list_del(&period->list);
120 slab_free_st(&ctx->sample_period_pool, period);
121 }
122 }
123
124 static void
fd_hw_destroy_query(struct fd_context * ctx,struct fd_query * q)125 fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
126 {
127 struct fd_hw_query *hq = fd_hw_query(q);
128
129 DBG("%p", q);
130
131 destroy_periods(ctx, hq);
132 list_del(&hq->list);
133
134 free(hq);
135 }
136
137 static void
fd_hw_begin_query(struct fd_context * ctx,struct fd_query * q)138 fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q) assert_dt
139 {
140 struct fd_batch *batch = fd_context_batch(ctx);
141 struct fd_hw_query *hq = fd_hw_query(q);
142
143 DBG("%p", q);
144
145 /* begin_query() should clear previous results: */
146 destroy_periods(ctx, hq);
147
148 if (batch && (ctx->active_queries || hq->provider->always))
149 resume_query(batch, hq, batch->draw);
150
151 /* add to active list: */
152 assert(list_is_empty(&hq->list));
153 list_addtail(&hq->list, &ctx->hw_active_queries);
154
155 fd_batch_reference(&batch, NULL);
156 }
157
158 static void
fd_hw_end_query(struct fd_context * ctx,struct fd_query * q)159 fd_hw_end_query(struct fd_context *ctx, struct fd_query *q) assert_dt
160 {
161 struct fd_batch *batch = fd_context_batch(ctx);
162 struct fd_hw_query *hq = fd_hw_query(q);
163
164 DBG("%p", q);
165
166 if (batch && (ctx->active_queries || hq->provider->always))
167 pause_query(batch, hq, batch->draw);
168
169 /* remove from active list: */
170 list_delinit(&hq->list);
171
172 fd_batch_reference(&batch, NULL);
173 }
174
175 /* helper to get ptr to specified sample: */
176 static void *
sampptr(struct fd_hw_sample * samp,uint32_t n,void * ptr)177 sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
178 {
179 return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
180 }
181
182 static bool
fd_hw_get_query_result(struct fd_context * ctx,struct fd_query * q,bool wait,union pipe_query_result * result)183 fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, bool wait,
184 union pipe_query_result *result)
185 {
186 struct fd_hw_query *hq = fd_hw_query(q);
187 const struct fd_hw_sample_provider *p = hq->provider;
188 struct fd_hw_sample_period *period, *tmp;
189
190 DBG("%p: wait=%d", q, wait);
191
192 if (list_is_empty(&hq->periods))
193 return true;
194
195 assert(list_is_empty(&hq->list));
196 assert(!hq->period);
197
198 /* sum the result across all sample periods. Start with the last period
199 * so that no-wait will bail quickly.
200 */
201 LIST_FOR_EACH_ENTRY_SAFE_REV (period, tmp, &hq->periods, list) {
202 struct fd_hw_sample *start = period->start;
203 ASSERTED struct fd_hw_sample *end = period->end;
204 unsigned i;
205
206 /* start and end samples should be from same batch: */
207 assert(start->prsc == end->prsc);
208 assert(start->num_tiles == end->num_tiles);
209
210 struct fd_resource *rsc = fd_resource(start->prsc);
211
212 /* ARB_occlusion_query says:
213 *
214 * "Querying the state for a given occlusion query forces that
215 * occlusion query to complete within a finite amount of time."
216 *
217 * So, regardless of whether we are supposed to wait or not, we do need to
218 * flush now.
219 */
220 if (fd_get_query_result_in_driver_thread(q)) {
221 tc_assert_driver_thread(ctx->tc);
222 fd_context_access_begin(ctx);
223 fd_bc_flush_writer(ctx, rsc);
224 fd_context_access_end(ctx);
225 }
226
227 /* some piglit tests at least do query with no draws, I guess: */
228 if (!rsc->bo)
229 continue;
230
231 if (!wait) {
232 int ret = fd_resource_wait(
233 ctx, rsc, FD_BO_PREP_READ | FD_BO_PREP_NOSYNC | FD_BO_PREP_FLUSH);
234 if (ret)
235 return false;
236 } else {
237 fd_resource_wait(ctx, rsc, FD_BO_PREP_READ);
238 }
239
240 void *ptr = fd_bo_map(rsc->bo);
241
242 for (i = 0; i < start->num_tiles; i++) {
243 p->accumulate_result(ctx, sampptr(period->start, i, ptr),
244 sampptr(period->end, i, ptr), result);
245 }
246 }
247
248 return true;
249 }
250
251 static const struct fd_query_funcs hw_query_funcs = {
252 .destroy_query = fd_hw_destroy_query,
253 .begin_query = fd_hw_begin_query,
254 .end_query = fd_hw_end_query,
255 .get_query_result = fd_hw_get_query_result,
256 };
257
258 struct fd_query *
fd_hw_create_query(struct fd_context * ctx,unsigned query_type,unsigned index)259 fd_hw_create_query(struct fd_context *ctx, unsigned query_type, unsigned index)
260 {
261 struct fd_hw_query *hq;
262 struct fd_query *q;
263 int idx = pidx(query_type);
264
265 if ((idx < 0) || !ctx->hw_sample_providers[idx])
266 return NULL;
267
268 hq = CALLOC_STRUCT(fd_hw_query);
269 if (!hq)
270 return NULL;
271
272 DBG("%p: query_type=%u", hq, query_type);
273
274 hq->provider = ctx->hw_sample_providers[idx];
275
276 list_inithead(&hq->periods);
277 list_inithead(&hq->list);
278
279 q = &hq->base;
280 q->funcs = &hw_query_funcs;
281 q->type = query_type;
282 q->index = index;
283
284 return q;
285 }
286
287 struct fd_hw_sample *
fd_hw_sample_init(struct fd_batch * batch,uint32_t size)288 fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
289 {
290 struct fd_hw_sample *samp = slab_alloc_st(&batch->ctx->sample_pool);
291 pipe_reference_init(&samp->reference, 1);
292 samp->size = size;
293 assert(util_is_power_of_two_or_zero(size));
294 batch->next_sample_offset = align(batch->next_sample_offset, size);
295 samp->offset = batch->next_sample_offset;
296 /* NOTE: slab_alloc_st() does not zero out the buffer: */
297 samp->prsc = NULL;
298 samp->num_tiles = 0;
299 samp->tile_stride = 0;
300 batch->next_sample_offset += size;
301
302 pipe_resource_reference(&samp->prsc, batch->query_buf);
303
304 return samp;
305 }
306
307 void
__fd_hw_sample_destroy(struct fd_context * ctx,struct fd_hw_sample * samp)308 __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
309 {
310 pipe_resource_reference(&samp->prsc, NULL);
311 slab_free_st(&ctx->sample_pool, samp);
312 }
313
314 /* called from gmem code once total storage requirements are known (ie.
315 * number of samples times number of tiles)
316 */
317 void
fd_hw_query_prepare(struct fd_batch * batch,uint32_t num_tiles)318 fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles)
319 {
320 uint32_t tile_stride = batch->next_sample_offset;
321
322 if (tile_stride > 0)
323 fd_resource_resize(batch->query_buf, tile_stride * num_tiles);
324
325 batch->query_tile_stride = tile_stride;
326
327 while (batch->samples.size > 0) {
328 struct fd_hw_sample *samp =
329 util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
330 samp->num_tiles = num_tiles;
331 samp->tile_stride = tile_stride;
332 fd_hw_sample_reference(batch->ctx, &samp, NULL);
333 }
334
335 /* reset things for next batch: */
336 batch->next_sample_offset = 0;
337 }
338
339 void
fd_hw_query_prepare_tile(struct fd_batch * batch,uint32_t n,struct fd_ringbuffer * ring)340 fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
341 struct fd_ringbuffer *ring)
342 {
343 uint32_t tile_stride = batch->query_tile_stride;
344 uint32_t offset = tile_stride * n;
345
346 /* bail if no queries: */
347 if (tile_stride == 0)
348 return;
349
350 fd_wfi(batch, ring);
351 OUT_PKT0(ring, HW_QUERY_BASE_REG, 1);
352 OUT_RELOC(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0);
353 }
354
355 void
fd_hw_query_update_batch(struct fd_batch * batch,bool disable_all)356 fd_hw_query_update_batch(struct fd_batch *batch, bool disable_all)
357 {
358 struct fd_context *ctx = batch->ctx;
359
360 if (disable_all || (ctx->dirty & FD_DIRTY_QUERY)) {
361 struct fd_hw_query *hq;
362 LIST_FOR_EACH_ENTRY (hq, &batch->ctx->hw_active_queries, list) {
363 bool was_active = query_active_in_batch(batch, hq);
364 bool now_active =
365 !disable_all && (ctx->active_queries || hq->provider->always);
366
367 if (now_active && !was_active)
368 resume_query(batch, hq, batch->draw);
369 else if (was_active && !now_active)
370 pause_query(batch, hq, batch->draw);
371 }
372 }
373 clear_sample_cache(batch);
374 }
375
376 /* call the provider->enable() for all the hw queries that were active
377 * in the current batch. This sets up perfctr selector regs statically
378 * for the duration of the batch.
379 */
380 void
fd_hw_query_enable(struct fd_batch * batch,struct fd_ringbuffer * ring)381 fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring)
382 {
383 struct fd_context *ctx = batch->ctx;
384 for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
385 if (batch->query_providers_used & (1 << idx)) {
386 assert(ctx->hw_sample_providers[idx]);
387 if (ctx->hw_sample_providers[idx]->enable)
388 ctx->hw_sample_providers[idx]->enable(ctx, ring);
389 }
390 }
391 }
392
393 void
fd_hw_query_register_provider(struct pipe_context * pctx,const struct fd_hw_sample_provider * provider)394 fd_hw_query_register_provider(struct pipe_context *pctx,
395 const struct fd_hw_sample_provider *provider)
396 {
397 struct fd_context *ctx = fd_context(pctx);
398 int idx = pidx(provider->query_type);
399
400 assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
401 assert(!ctx->hw_sample_providers[idx]);
402
403 ctx->hw_sample_providers[idx] = provider;
404 }
405
406 void
fd_hw_query_init(struct pipe_context * pctx)407 fd_hw_query_init(struct pipe_context *pctx)
408 {
409 struct fd_context *ctx = fd_context(pctx);
410
411 slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample), 16);
412 slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
413 16);
414 }
415
416 void
fd_hw_query_fini(struct pipe_context * pctx)417 fd_hw_query_fini(struct pipe_context *pctx)
418 {
419 struct fd_context *ctx = fd_context(pctx);
420
421 slab_destroy(&ctx->sample_pool);
422 slab_destroy(&ctx->sample_period_pool);
423 }
424