• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "si_pipe.h"
8 #include "si_query.h"
9 #include "sid.h"
10 #include "util/u_memory.h"
11 #include "util/u_suballoc.h"
12 
13 #include <stddef.h>
14 
emit_shader_query(struct si_context * sctx,unsigned index)15 static void emit_shader_query(struct si_context *sctx, unsigned index)
16 {
17    assert(!list_is_empty(&sctx->shader_query_buffers));
18 
19    struct gfx11_sh_query_buffer *qbuf =
20       list_last_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
21    qbuf->head += sizeof(struct gfx11_sh_query_buffer_mem);
22 }
23 
gfx11_release_query_buffers(struct si_context * sctx,struct gfx11_sh_query_buffer * first,struct gfx11_sh_query_buffer * last)24 static void gfx11_release_query_buffers(struct si_context *sctx,
25                                         struct gfx11_sh_query_buffer *first,
26                                         struct gfx11_sh_query_buffer *last)
27 {
28    while (first) {
29       struct gfx11_sh_query_buffer *qbuf = first;
30       if (first != last)
31          first = list_entry(qbuf->list.next, struct gfx11_sh_query_buffer, list);
32       else
33          first = NULL;
34 
35       qbuf->refcount--;
36       if (qbuf->refcount)
37          continue;
38 
39       if (qbuf->list.next == &sctx->shader_query_buffers)
40          continue; /* keep the most recent buffer; it may not be full yet */
41       if (qbuf->list.prev == &sctx->shader_query_buffers)
42          continue; /* keep the oldest buffer for recycling */
43 
44       list_del(&qbuf->list);
45       si_resource_reference(&qbuf->buf, NULL);
46       FREE(qbuf);
47    }
48 }
49 
gfx11_alloc_query_buffer(struct si_context * sctx)50 static bool gfx11_alloc_query_buffer(struct si_context *sctx)
51 {
52    if (si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query))
53       return true;
54 
55    struct gfx11_sh_query_buffer *qbuf = NULL;
56 
57    if (!list_is_empty(&sctx->shader_query_buffers)) {
58       qbuf = list_last_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
59       if (qbuf->head + sizeof(struct gfx11_sh_query_buffer_mem) <= qbuf->buf->b.b.width0)
60          goto success;
61 
62       qbuf = list_first_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
63       if (!qbuf->refcount &&
64           !si_cs_is_buffer_referenced(sctx, qbuf->buf->buf, RADEON_USAGE_READWRITE) &&
65           sctx->ws->buffer_wait(sctx->ws, qbuf->buf->buf, 0,
66                                 RADEON_USAGE_READWRITE | RADEON_USAGE_DISALLOW_SLOW_REPLY)) {
67          /* Can immediately re-use the oldest buffer */
68          list_del(&qbuf->list);
69       } else {
70          qbuf = NULL;
71       }
72    }
73 
74    if (!qbuf) {
75       qbuf = CALLOC_STRUCT(gfx11_sh_query_buffer);
76       if (unlikely(!qbuf))
77          return false;
78 
79       struct si_screen *screen = sctx->screen;
80       unsigned buf_size =
81          MAX2(sizeof(struct gfx11_sh_query_buffer_mem), screen->info.min_alloc_size);
82       qbuf->buf = si_resource(pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
83       if (unlikely(!qbuf->buf)) {
84          FREE(qbuf);
85          return false;
86       }
87    }
88 
89    /* The buffer is currently unused by the GPU. Initialize it.
90     *
91     * We need to set the high bit of all the primitive counters for
92     * compatibility with the SET_PREDICATION packet.
93     */
94    uint64_t *results = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL,
95                                             PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
96    assert(results);
97 
98    for (unsigned i = 0, e = qbuf->buf->b.b.width0 / sizeof(struct gfx11_sh_query_buffer_mem); i < e;
99         ++i) {
100       for (unsigned j = 0; j < 16; ++j)
101          results[32 * i + j] = (uint64_t)1 << 63;
102       results[32 * i + 16] = 0;
103    }
104 
105    list_addtail(&qbuf->list, &sctx->shader_query_buffers);
106    qbuf->head = 0;
107    qbuf->refcount = sctx->num_active_shader_queries;
108 
109 success:;
110    struct pipe_shader_buffer sbuf;
111    sbuf.buffer = &qbuf->buf->b.b;
112    sbuf.buffer_offset = qbuf->head;
113    sbuf.buffer_size = sizeof(struct gfx11_sh_query_buffer_mem);
114    si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, &sbuf);
115    SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 1);
116 
117    si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query);
118    return true;
119 }
120 
gfx11_sh_query_destroy(struct si_context * sctx,struct si_query * rquery)121 static void gfx11_sh_query_destroy(struct si_context *sctx, struct si_query *rquery)
122 {
123    struct gfx11_sh_query *query = (struct gfx11_sh_query *)rquery;
124    gfx11_release_query_buffers(sctx, query->first, query->last);
125    FREE(query);
126 }
127 
gfx11_sh_query_begin(struct si_context * sctx,struct si_query * rquery)128 static bool gfx11_sh_query_begin(struct si_context *sctx, struct si_query *rquery)
129 {
130    struct gfx11_sh_query *query = (struct gfx11_sh_query *)rquery;
131 
132    gfx11_release_query_buffers(sctx, query->first, query->last);
133    query->first = query->last = NULL;
134 
135    if (unlikely(!gfx11_alloc_query_buffer(sctx)))
136       return false;
137 
138    query->first = list_last_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
139    query->first_begin = query->first->head;
140 
141    sctx->num_active_shader_queries++;
142    query->first->refcount++;
143 
144    return true;
145 }
146 
gfx11_sh_query_end(struct si_context * sctx,struct si_query * rquery)147 static bool gfx11_sh_query_end(struct si_context *sctx, struct si_query *rquery)
148 {
149    struct gfx11_sh_query *query = (struct gfx11_sh_query *)rquery;
150 
151    if (unlikely(!query->first))
152       return false; /* earlier out of memory error */
153 
154    query->last = list_last_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
155    query->last_end = query->last->head;
156 
157    /* Signal the fence of the previous chunk */
158    if (query->last_end != 0) {
159       uint64_t fence_va = query->last->buf->gpu_address;
160       fence_va += query->last_end - sizeof(struct gfx11_sh_query_buffer_mem);
161       fence_va += offsetof(struct gfx11_sh_query_buffer_mem, fence);
162       si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
163                         EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, query->last->buf, fence_va,
164                         0xffffffff, PIPE_QUERY_GPU_FINISHED);
165    }
166 
167    sctx->num_active_shader_queries--;
168 
169    if (sctx->num_active_shader_queries <= 0 || !si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) {
170       si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL);
171       SET_FIELD(sctx->current_gs_state, GS_STATE_STREAMOUT_QUERY_ENABLED, 0);
172 
173       /* If a query_begin is followed by a query_end without a draw
174        * in-between, we need to clear the atom to ensure that the
175        * next query_begin will re-initialize the shader buffer. */
176       si_set_atom_dirty(sctx, &sctx->atoms.s.shader_query, false);
177    }
178 
179    return true;
180 }
181 
gfx11_sh_query_add_result(struct gfx11_sh_query * query,struct gfx11_sh_query_buffer_mem * qmem,union pipe_query_result * result)182 static void gfx11_sh_query_add_result(struct gfx11_sh_query *query,
183                                       struct gfx11_sh_query_buffer_mem *qmem,
184                                       union pipe_query_result *result)
185 {
186    static const uint64_t mask = ((uint64_t)1 << 63) - 1;
187 
188    switch (query->b.type) {
189    case PIPE_QUERY_PRIMITIVES_EMITTED:
190       result->u64 += qmem->stream[query->stream].emitted_primitives & mask;
191       break;
192    case PIPE_QUERY_PRIMITIVES_GENERATED:
193       result->u64 += qmem->stream[query->stream].generated_primitives & mask;
194       break;
195    case PIPE_QUERY_SO_STATISTICS:
196       result->so_statistics.num_primitives_written +=
197          qmem->stream[query->stream].emitted_primitives & mask;
198       result->so_statistics.primitives_storage_needed +=
199          qmem->stream[query->stream].generated_primitives & mask;
200       break;
201    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
202       result->b |= qmem->stream[query->stream].emitted_primitives !=
203                    qmem->stream[query->stream].generated_primitives;
204       break;
205    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
206       for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) {
207          result->b |= qmem->stream[stream].emitted_primitives !=
208                       qmem->stream[stream].generated_primitives;
209       }
210       break;
211    default:
212       assert(0);
213    }
214 }
215 
gfx11_sh_query_get_result(struct si_context * sctx,struct si_query * rquery,bool wait,union pipe_query_result * result)216 static bool gfx11_sh_query_get_result(struct si_context *sctx, struct si_query *rquery, bool wait,
217                                       union pipe_query_result *result)
218 {
219    struct gfx11_sh_query *query = (struct gfx11_sh_query *)rquery;
220 
221    util_query_clear_result(result, query->b.type);
222 
223    if (unlikely(!query->first))
224       return false; /* earlier out of memory error */
225    assert(query->last);
226 
227    for (struct gfx11_sh_query_buffer *qbuf = query->last;;
228         qbuf = list_entry(qbuf->list.prev, struct gfx11_sh_query_buffer, list)) {
229       unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
230       void *map;
231 
232       if (rquery->b.flushed)
233          map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);
234       else
235          map = si_buffer_map(sctx, qbuf->buf, usage);
236 
237       if (!map)
238          return false;
239 
240       unsigned results_begin = 0;
241       unsigned results_end = qbuf->head;
242       if (qbuf == query->first)
243          results_begin = query->first_begin;
244       if (qbuf == query->last)
245          results_end = query->last_end;
246 
247       while (results_begin != results_end) {
248          struct gfx11_sh_query_buffer_mem *qmem = map + results_begin;
249          results_begin += sizeof(*qmem);
250 
251          gfx11_sh_query_add_result(query, qmem, result);
252       }
253 
254       if (qbuf == query->first)
255          break;
256    }
257 
258    return true;
259 }
260 
gfx11_sh_query_get_result_resource(struct si_context * sctx,struct si_query * rquery,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)261 static void gfx11_sh_query_get_result_resource(struct si_context *sctx, struct si_query *rquery,
262                                                enum pipe_query_flags flags,
263                                                enum pipe_query_value_type result_type,
264                                                int index, struct pipe_resource *resource,
265                                                unsigned offset)
266 {
267    struct gfx11_sh_query *query = (struct gfx11_sh_query *)rquery;
268    struct si_qbo_state saved_state = {};
269    struct pipe_resource *tmp_buffer = NULL;
270    unsigned tmp_buffer_offset = 0;
271 
272    if (!sctx->sh_query_result_shader) {
273       sctx->sh_query_result_shader = gfx11_create_sh_query_result_cs(sctx);
274       if (!sctx->sh_query_result_shader)
275          return;
276    }
277 
278    if (query->first != query->last) {
279       u_suballocator_alloc(&sctx->allocator_zeroed_memory, 16, 16, &tmp_buffer_offset, &tmp_buffer);
280       if (!tmp_buffer)
281          return;
282    }
283 
284    si_save_qbo_state(sctx, &saved_state);
285 
286    /* Pre-fill the constants configuring the shader behavior. */
287    struct {
288       uint32_t config;
289       uint32_t offset;
290       uint32_t chain;
291       uint32_t result_count;
292    } consts;
293    struct pipe_constant_buffer constant_buffer = {};
294 
295    if (index >= 0) {
296       switch (query->b.type) {
297       case PIPE_QUERY_PRIMITIVES_GENERATED:
298          consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * sizeof(uint64_t);
299          consts.config = 0;
300          break;
301       case PIPE_QUERY_PRIMITIVES_EMITTED:
302          consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * sizeof(uint64_t);
303          consts.config = 0;
304          break;
305       case PIPE_QUERY_SO_STATISTICS:
306          consts.offset = sizeof(uint32_t) * (4 * index + query->stream);
307          consts.config = 0;
308          break;
309       case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
310          consts.offset = 4 * sizeof(uint64_t) * query->stream;
311          consts.config = 2;
312          break;
313       case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
314          consts.offset = 0;
315          consts.config = 3;
316          break;
317       default:
318          unreachable("bad query type");
319       }
320    } else {
321       /* Check result availability. */
322       consts.offset = 0;
323       consts.config = 1;
324    }
325 
326    bool is_result_64bit = result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64;
327    if (is_result_64bit)
328       consts.config |= 8;
329 
330    constant_buffer.buffer_size = sizeof(consts);
331    constant_buffer.user_buffer = &consts;
332 
333    /* Pre-fill the SSBOs and grid. */
334    struct pipe_shader_buffer ssbo[3];
335    struct pipe_grid_info grid = {};
336 
337    ssbo[1].buffer = tmp_buffer;
338    ssbo[1].buffer_offset = tmp_buffer_offset;
339    ssbo[1].buffer_size = 16;
340 
341    ssbo[2] = ssbo[1];
342 
343    grid.block[0] = 1;
344    grid.block[1] = 1;
345    grid.block[2] = 1;
346    grid.grid[0] = 1;
347    grid.grid[1] = 1;
348    grid.grid[2] = 1;
349 
350    /* TODO: Range-invalidate GL2 */
351    if (sctx->screen->info.cp_sdma_ge_use_system_memory_scope) {
352       sctx->barrier_flags |= SI_BARRIER_INV_L2;
353       si_mark_atom_dirty(sctx, &sctx->atoms.s.barrier);
354    }
355 
356    struct gfx11_sh_query_buffer *qbuf = query->first;
357    for (;;) {
358       unsigned begin = qbuf == query->first ? query->first_begin : 0;
359       unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0;
360       if (!end)
361          continue;
362 
363       ssbo[0].buffer = &qbuf->buf->b.b;
364       ssbo[0].buffer_offset = begin;
365       ssbo[0].buffer_size = end - begin;
366 
367       consts.result_count = (end - begin) / sizeof(struct gfx11_sh_query_buffer_mem);
368       consts.chain = 0;
369       if (qbuf != query->first)
370          consts.chain |= 1;
371       if (qbuf != query->last)
372          consts.chain |= 2;
373 
374       if (qbuf == query->last) {
375          ssbo[2].buffer = resource;
376          ssbo[2].buffer_offset = offset;
377          ssbo[2].buffer_size = is_result_64bit ? 8 : 4;
378       }
379 
380       sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, false, &constant_buffer);
381 
382       if (flags & PIPE_QUERY_WAIT) {
383          uint64_t va;
384 
385          /* Wait for result availability. Wait only for readiness
386           * of the last entry, since the fence writes should be
387           * serialized in the CP.
388           */
389          va = qbuf->buf->gpu_address;
390          va += end - sizeof(struct gfx11_sh_query_buffer_mem);
391          va += offsetof(struct gfx11_sh_query_buffer_mem, fence);
392 
393          si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
394       }
395 
396       /* ssbo[2] is either tmp_buffer or resource */
397       assert(ssbo[2].buffer);
398 
399       unsigned writable_bitmask = (1 << 2) | (ssbo[1].buffer ? 1 << 1 : 0);
400 
401       si_barrier_before_internal_op(sctx, 0, 3, ssbo, writable_bitmask, 0, NULL);
402       si_launch_grid_internal_ssbos(sctx, &grid, sctx->sh_query_result_shader, 3, ssbo,
403                                     writable_bitmask, false);
404       si_barrier_after_internal_op(sctx, 0, 3, ssbo, writable_bitmask, 0, NULL);
405 
406       if (qbuf == query->last)
407          break;
408       qbuf = list_entry(qbuf->list.next, struct gfx11_sh_query_buffer, list);
409    }
410 
411    si_restore_qbo_state(sctx, &saved_state);
412    pipe_resource_reference(&tmp_buffer, NULL);
413 }
414 
415 static const struct si_query_ops gfx11_sh_query_ops = {
416    .destroy = gfx11_sh_query_destroy,
417    .begin = gfx11_sh_query_begin,
418    .end = gfx11_sh_query_end,
419    .get_result = gfx11_sh_query_get_result,
420    .get_result_resource = gfx11_sh_query_get_result_resource,
421 };
422 
gfx11_sh_query_create(struct si_screen * screen,enum pipe_query_type query_type,unsigned index)423 struct pipe_query *gfx11_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type,
424                                          unsigned index)
425 {
426    struct gfx11_sh_query *query = CALLOC_STRUCT(gfx11_sh_query);
427    if (unlikely(!query))
428       return NULL;
429 
430    query->b.ops = &gfx11_sh_query_ops;
431    query->b.type = query_type;
432    query->stream = index;
433 
434    return (struct pipe_query *)query;
435 }
436 
si_gfx11_init_query(struct si_context * sctx)437 void si_gfx11_init_query(struct si_context *sctx)
438 {
439    list_inithead(&sctx->shader_query_buffers);
440    sctx->atoms.s.shader_query.emit = emit_shader_query;
441 }
442 
si_gfx11_destroy_query(struct si_context * sctx)443 void si_gfx11_destroy_query(struct si_context *sctx)
444 {
445    if (!sctx->shader_query_buffers.next)
446       return;
447 
448    while (!list_is_empty(&sctx->shader_query_buffers)) {
449       struct gfx11_sh_query_buffer *qbuf =
450          list_first_entry(&sctx->shader_query_buffers, struct gfx11_sh_query_buffer, list);
451       list_del(&qbuf->list);
452 
453       assert(!qbuf->refcount);
454       si_resource_reference(&qbuf->buf, NULL);
455       FREE(qbuf);
456    }
457 }
458