1 /*
2 * Copyright 2011 Christoph Bumiller
3 * Copyright 2015 Samuel Pitoiset
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
25
26 #include "nvc0/nvc0_context.h"
27 #include "nvc0/nvc0_query_hw.h"
28 #include "nvc0/nvc0_query_hw_metric.h"
29 #include "nvc0/nvc0_query_hw_sm.h"
30
31 #define NVC0_HW_QUERY_ALLOC_SPACE 256
32
33 bool
nvc0_hw_query_allocate(struct nvc0_context * nvc0,struct nvc0_query * q,int size)34 nvc0_hw_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q,
35 int size)
36 {
37 struct nvc0_hw_query *hq = nvc0_hw_query(q);
38 struct nvc0_screen *screen = nvc0->screen;
39 int ret;
40
41 if (hq->bo) {
42 nouveau_bo_ref(NULL, &hq->bo);
43 if (hq->mm) {
44 if (hq->state == NVC0_HW_QUERY_STATE_READY)
45 nouveau_mm_free(hq->mm);
46 else
47 nouveau_fence_work(screen->base.fence.current,
48 nouveau_mm_free_work, hq->mm);
49 }
50 }
51 if (size) {
52 hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &hq->bo,
53 &hq->base_offset);
54 if (!hq->bo)
55 return false;
56 hq->offset = hq->base_offset;
57
58 ret = nouveau_bo_map(hq->bo, 0, nvc0->base.client);
59 if (ret) {
60 nvc0_hw_query_allocate(nvc0, q, 0);
61 return false;
62 }
63 hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset);
64 }
65 return true;
66 }
67
68 static void
nvc0_hw_query_get(struct nouveau_pushbuf * push,struct nvc0_query * q,unsigned offset,uint32_t get)69 nvc0_hw_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
70 unsigned offset, uint32_t get)
71 {
72 struct nvc0_hw_query *hq = nvc0_hw_query(q);
73
74 offset += hq->offset;
75
76 PUSH_SPACE(push, 5);
77 PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
78 BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
79 PUSH_DATAh(push, hq->bo->offset + offset);
80 PUSH_DATA (push, hq->bo->offset + offset);
81 PUSH_DATA (push, hq->sequence);
82 PUSH_DATA (push, get);
83 }
84
85 static void
nvc0_hw_query_rotate(struct nvc0_context * nvc0,struct nvc0_query * q)86 nvc0_hw_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
87 {
88 struct nvc0_hw_query *hq = nvc0_hw_query(q);
89
90 hq->offset += hq->rotate;
91 hq->data += hq->rotate / sizeof(*hq->data);
92 if (hq->offset - hq->base_offset == NVC0_HW_QUERY_ALLOC_SPACE)
93 nvc0_hw_query_allocate(nvc0, q, NVC0_HW_QUERY_ALLOC_SPACE);
94 }
95
96 static inline void
nvc0_hw_query_update(struct nouveau_client * cli,struct nvc0_query * q)97 nvc0_hw_query_update(struct nouveau_client *cli, struct nvc0_query *q)
98 {
99 struct nvc0_hw_query *hq = nvc0_hw_query(q);
100
101 if (hq->is64bit) {
102 if (nouveau_fence_signalled(hq->fence))
103 hq->state = NVC0_HW_QUERY_STATE_READY;
104 } else {
105 if (hq->data[0] == hq->sequence)
106 hq->state = NVC0_HW_QUERY_STATE_READY;
107 }
108 }
109
110 static void
nvc0_hw_destroy_query(struct nvc0_context * nvc0,struct nvc0_query * q)111 nvc0_hw_destroy_query(struct nvc0_context *nvc0, struct nvc0_query *q)
112 {
113 struct nvc0_hw_query *hq = nvc0_hw_query(q);
114
115 if (hq->funcs && hq->funcs->destroy_query) {
116 hq->funcs->destroy_query(nvc0, hq);
117 return;
118 }
119
120 nvc0_hw_query_allocate(nvc0, q, 0);
121 nouveau_fence_ref(NULL, &hq->fence);
122 FREE(hq);
123 }
124
125 static void
nvc0_hw_query_write_compute_invocations(struct nvc0_context * nvc0,struct nvc0_hw_query * hq,uint32_t offset)126 nvc0_hw_query_write_compute_invocations(struct nvc0_context *nvc0,
127 struct nvc0_hw_query *hq,
128 uint32_t offset)
129 {
130 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
131
132 nouveau_pushbuf_space(push, 16, 0, 8);
133 PUSH_REFN(push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
134 BEGIN_1IC0(push, NVC0_3D(MACRO_COMPUTE_COUNTER_TO_QUERY), 4);
135 PUSH_DATA (push, nvc0->compute_invocations);
136 PUSH_DATAh(push, nvc0->compute_invocations);
137 PUSH_DATAh(push, hq->bo->offset + hq->offset + offset);
138 PUSH_DATA (push, hq->bo->offset + hq->offset + offset);
139 }
140
141 static bool
nvc0_hw_begin_query(struct nvc0_context * nvc0,struct nvc0_query * q)142 nvc0_hw_begin_query(struct nvc0_context *nvc0, struct nvc0_query *q)
143 {
144 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
145 struct nvc0_hw_query *hq = nvc0_hw_query(q);
146 bool ret = true;
147
148 if (hq->funcs && hq->funcs->begin_query)
149 return hq->funcs->begin_query(nvc0, hq);
150
151 /* For occlusion queries we have to change the storage, because a previous
152 * query might set the initial render condition to false even *after* we re-
153 * initialized it to true.
154 */
155 if (hq->rotate) {
156 nvc0_hw_query_rotate(nvc0, q);
157
158 /* XXX: can we do this with the GPU, and sync with respect to a previous
159 * query ?
160 */
161 hq->data[0] = hq->sequence; /* initialize sequence */
162 hq->data[1] = 1; /* initial render condition = true */
163 hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */
164 hq->data[5] = 0;
165 }
166 hq->sequence++;
167
168 switch (q->type) {
169 case PIPE_QUERY_OCCLUSION_COUNTER:
170 case PIPE_QUERY_OCCLUSION_PREDICATE:
171 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
172 if (nvc0->screen->num_occlusion_queries_active++) {
173 nvc0_hw_query_get(push, q, 0x10, 0x0100f002);
174 } else {
175 PUSH_SPACE(push, 3);
176 BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
177 PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
178 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
179 /* Given that the counter is reset, the contents at 0x10 are
180 * equivalent to doing the query -- we would get hq->sequence as the
181 * payload and 0 as the reported value. This is already set up above
182 * as in the hq->rotate case.
183 */
184 }
185 break;
186 case PIPE_QUERY_PRIMITIVES_GENERATED:
187 nvc0_hw_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
188 break;
189 case PIPE_QUERY_PRIMITIVES_EMITTED:
190 nvc0_hw_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
191 break;
192 case PIPE_QUERY_SO_STATISTICS:
193 nvc0_hw_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
194 nvc0_hw_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
195 break;
196 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
197 nvc0_hw_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
198 break;
199 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
200 /* XXX: This get actually writes the number of overflowed streams */
201 nvc0_hw_query_get(push, q, 0x10, 0x0f005002);
202 break;
203 case PIPE_QUERY_TIME_ELAPSED:
204 nvc0_hw_query_get(push, q, 0x10, 0x00005002);
205 break;
206 case PIPE_QUERY_PIPELINE_STATISTICS:
207 nvc0_hw_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
208 nvc0_hw_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
209 nvc0_hw_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
210 nvc0_hw_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
211 nvc0_hw_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
212 nvc0_hw_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
213 nvc0_hw_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
214 nvc0_hw_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
215 nvc0_hw_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
216 nvc0_hw_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
217 nvc0_hw_query_write_compute_invocations(nvc0, hq, 0xc0 + 0xa0);
218 break;
219 default:
220 break;
221 }
222 hq->state = NVC0_HW_QUERY_STATE_ACTIVE;
223 return ret;
224 }
225
226 static void
nvc0_hw_end_query(struct nvc0_context * nvc0,struct nvc0_query * q)227 nvc0_hw_end_query(struct nvc0_context *nvc0, struct nvc0_query *q)
228 {
229 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
230 struct nvc0_hw_query *hq = nvc0_hw_query(q);
231
232 if (hq->funcs && hq->funcs->end_query) {
233 hq->funcs->end_query(nvc0, hq);
234 return;
235 }
236
237 if (hq->state != NVC0_HW_QUERY_STATE_ACTIVE) {
238 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
239 if (hq->rotate)
240 nvc0_hw_query_rotate(nvc0, q);
241 hq->sequence++;
242 }
243 hq->state = NVC0_HW_QUERY_STATE_ENDED;
244
245 switch (q->type) {
246 case PIPE_QUERY_OCCLUSION_COUNTER:
247 case PIPE_QUERY_OCCLUSION_PREDICATE:
248 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
249 nvc0_hw_query_get(push, q, 0, 0x0100f002);
250 if (--nvc0->screen->num_occlusion_queries_active == 0) {
251 PUSH_SPACE(push, 1);
252 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
253 }
254 break;
255 case PIPE_QUERY_PRIMITIVES_GENERATED:
256 nvc0_hw_query_get(push, q, 0, 0x09005002 | (q->index << 5));
257 break;
258 case PIPE_QUERY_PRIMITIVES_EMITTED:
259 nvc0_hw_query_get(push, q, 0, 0x05805002 | (q->index << 5));
260 break;
261 case PIPE_QUERY_SO_STATISTICS:
262 nvc0_hw_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
263 nvc0_hw_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
264 break;
265 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
266 nvc0_hw_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
267 break;
268 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
269 /* XXX: This get actually writes the number of overflowed streams */
270 nvc0_hw_query_get(push, q, 0x00, 0x0f005002);
271 break;
272 case PIPE_QUERY_TIMESTAMP:
273 case PIPE_QUERY_TIME_ELAPSED:
274 nvc0_hw_query_get(push, q, 0, 0x00005002);
275 break;
276 case PIPE_QUERY_GPU_FINISHED:
277 nvc0_hw_query_get(push, q, 0, 0x1000f010);
278 break;
279 case PIPE_QUERY_PIPELINE_STATISTICS:
280 nvc0_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
281 nvc0_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
282 nvc0_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
283 nvc0_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
284 nvc0_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
285 nvc0_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
286 nvc0_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
287 nvc0_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
288 nvc0_hw_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
289 nvc0_hw_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
290 nvc0_hw_query_write_compute_invocations(nvc0, hq, 0xa0);
291 break;
292 case PIPE_QUERY_TIMESTAMP_DISJOINT:
293 /* This query is not issued on GPU because disjoint is forced to false */
294 hq->state = NVC0_HW_QUERY_STATE_READY;
295 break;
296 case NVC0_HW_QUERY_TFB_BUFFER_OFFSET:
297 /* indexed by TFB buffer instead of by vertex stream */
298 nvc0_hw_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
299 break;
300 default:
301 break;
302 }
303 if (hq->is64bit)
304 nouveau_fence_ref(nvc0->screen->base.fence.current, &hq->fence);
305 }
306
307 static bool
nvc0_hw_get_query_result(struct nvc0_context * nvc0,struct nvc0_query * q,bool wait,union pipe_query_result * result)308 nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
309 bool wait, union pipe_query_result *result)
310 {
311 struct nvc0_hw_query *hq = nvc0_hw_query(q);
312 uint64_t *res64 = (uint64_t*)result;
313 uint32_t *res32 = (uint32_t*)result;
314 uint8_t *res8 = (uint8_t*)result;
315 uint64_t *data64 = (uint64_t *)hq->data;
316 unsigned i;
317
318 if (hq->funcs && hq->funcs->get_query_result)
319 return hq->funcs->get_query_result(nvc0, hq, wait, result);
320
321 if (hq->state != NVC0_HW_QUERY_STATE_READY)
322 nvc0_hw_query_update(nvc0->base.client, q);
323
324 if (hq->state != NVC0_HW_QUERY_STATE_READY) {
325 if (!wait) {
326 if (hq->state != NVC0_HW_QUERY_STATE_FLUSHED) {
327 hq->state = NVC0_HW_QUERY_STATE_FLUSHED;
328 /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
329 PUSH_KICK(nvc0->base.pushbuf);
330 }
331 return false;
332 }
333 if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nvc0->base.client))
334 return false;
335 NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
336 }
337 hq->state = NVC0_HW_QUERY_STATE_READY;
338
339 switch (q->type) {
340 case PIPE_QUERY_GPU_FINISHED:
341 res8[0] = true;
342 break;
343 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
344 res64[0] = hq->data[1] - hq->data[5];
345 break;
346 case PIPE_QUERY_OCCLUSION_PREDICATE:
347 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
348 res8[0] = hq->data[1] != hq->data[5];
349 break;
350 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
351 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
352 res64[0] = data64[0] - data64[2];
353 break;
354 case PIPE_QUERY_SO_STATISTICS:
355 res64[0] = data64[0] - data64[4];
356 res64[1] = data64[2] - data64[6];
357 break;
358 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
359 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
360 res8[0] = data64[0] != data64[2];
361 break;
362 case PIPE_QUERY_TIMESTAMP:
363 res64[0] = data64[1];
364 break;
365 case PIPE_QUERY_TIMESTAMP_DISJOINT:
366 res64[0] = 1000000000;
367 res8[8] = false;
368 break;
369 case PIPE_QUERY_TIME_ELAPSED:
370 res64[0] = data64[1] - data64[3];
371 break;
372 case PIPE_QUERY_PIPELINE_STATISTICS:
373 for (i = 0; i < 11; ++i)
374 res64[i] = data64[i * 2] - data64[24 + i * 2];
375 break;
376 case NVC0_HW_QUERY_TFB_BUFFER_OFFSET:
377 res32[0] = hq->data[1];
378 break;
379 default:
380 assert(0); /* can't happen, we don't create queries with invalid type */
381 return false;
382 }
383
384 return true;
385 }
386
387 static void
nvc0_hw_get_query_result_resource(struct nvc0_context * nvc0,struct nvc0_query * q,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)388 nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
389 struct nvc0_query *q,
390 enum pipe_query_flags flags,
391 enum pipe_query_value_type result_type,
392 int index,
393 struct pipe_resource *resource,
394 unsigned offset)
395 {
396 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
397 struct nvc0_hw_query *hq = nvc0_hw_query(q);
398 struct nv04_resource *buf = nv04_resource(resource);
399 unsigned qoffset = 0, stride;
400
401 assert(!hq->funcs || !hq->funcs->get_query_result);
402
403 if (index == -1) {
404 /* TODO: Use a macro to write the availability of the query */
405 if (hq->state != NVC0_HW_QUERY_STATE_READY)
406 nvc0_hw_query_update(nvc0->base.client, q);
407 uint32_t ready[2] = {hq->state == NVC0_HW_QUERY_STATE_READY};
408 nvc0->base.push_cb(&nvc0->base, buf, offset,
409 result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
410 ready);
411
412 util_range_add(&buf->base, &buf->valid_buffer_range, offset,
413 offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4));
414
415 nvc0_resource_validate(buf, NOUVEAU_BO_WR);
416
417 return;
418 }
419
420 /* If the fence guarding this query has not been emitted, that makes a lot
421 * of the following logic more complicated.
422 */
423 if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED)
424 nouveau_fence_emit(hq->fence);
425
426 /* We either need to compute a 32- or 64-bit difference between 2 values,
427 * and then store the result as either a 32- or 64-bit value. As such let's
428 * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit
429 * ones), and have one macro that clamps result to i32, u32, or just
430 * outputs the difference (no need to worry about 64-bit clamping).
431 */
432 if (hq->state != NVC0_HW_QUERY_STATE_READY)
433 nvc0_hw_query_update(nvc0->base.client, q);
434
435 if ((flags & PIPE_QUERY_WAIT) && hq->state != NVC0_HW_QUERY_STATE_READY)
436 nvc0_hw_query_fifo_wait(nvc0, q);
437
438 nouveau_pushbuf_space(push, 32, 2, 3);
439 PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
440 PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
441 BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 9);
442 switch (q->type) {
443 case PIPE_QUERY_OCCLUSION_PREDICATE:
444 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: /* XXX what if 64-bit? */
445 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
446 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
447 PUSH_DATA(push, 0x00000001);
448 break;
449 default:
450 if (result_type == PIPE_QUERY_TYPE_I32)
451 PUSH_DATA(push, 0x7fffffff);
452 else if (result_type == PIPE_QUERY_TYPE_U32)
453 PUSH_DATA(push, 0xffffffff);
454 else
455 PUSH_DATA(push, 0x00000000);
456 break;
457 }
458
459 switch (q->type) {
460 case PIPE_QUERY_SO_STATISTICS:
461 stride = 2;
462 break;
463 case PIPE_QUERY_PIPELINE_STATISTICS:
464 stride = 12;
465 break;
466 case PIPE_QUERY_TIME_ELAPSED:
467 case PIPE_QUERY_TIMESTAMP:
468 qoffset = 8;
469 FALLTHROUGH;
470 default:
471 assert(index == 0);
472 stride = 1;
473 break;
474 }
475
476 if (hq->is64bit || qoffset) {
477 nouveau_pushbuf_data(push, hq->bo, hq->offset + qoffset + 16 * index,
478 8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
479 if (q->type == PIPE_QUERY_TIMESTAMP) {
480 PUSH_DATA(push, 0);
481 PUSH_DATA(push, 0);
482 } else {
483 nouveau_pushbuf_data(push, hq->bo, hq->offset + qoffset +
484 16 * (index + stride),
485 8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
486 }
487 } else {
488 nouveau_pushbuf_data(push, hq->bo, hq->offset + 4,
489 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
490 PUSH_DATA(push, 0);
491 nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4,
492 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
493 PUSH_DATA(push, 0);
494 }
495
496 if ((flags & PIPE_QUERY_WAIT) || hq->state == NVC0_HW_QUERY_STATE_READY) {
497 PUSH_DATA(push, 0);
498 PUSH_DATA(push, 0);
499 } else if (hq->is64bit) {
500 PUSH_DATA(push, hq->fence->sequence);
501 nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
502 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
503 } else {
504 PUSH_DATA(push, hq->sequence);
505 nouveau_pushbuf_data(push, hq->bo, hq->offset,
506 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
507 }
508 PUSH_DATAh(push, buf->address + offset);
509 PUSH_DATA (push, buf->address + offset);
510
511 util_range_add(&buf->base, &buf->valid_buffer_range, offset,
512 offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4));
513
514 nvc0_resource_validate(buf, NOUVEAU_BO_WR);
515 }
516
517 static const struct nvc0_query_funcs hw_query_funcs = {
518 .destroy_query = nvc0_hw_destroy_query,
519 .begin_query = nvc0_hw_begin_query,
520 .end_query = nvc0_hw_end_query,
521 .get_query_result = nvc0_hw_get_query_result,
522 .get_query_result_resource = nvc0_hw_get_query_result_resource,
523 };
524
525 struct nvc0_query *
nvc0_hw_create_query(struct nvc0_context * nvc0,unsigned type,unsigned index)526 nvc0_hw_create_query(struct nvc0_context *nvc0, unsigned type, unsigned index)
527 {
528 struct nvc0_hw_query *hq;
529 struct nvc0_query *q;
530 unsigned space = NVC0_HW_QUERY_ALLOC_SPACE;
531
532 hq = nvc0_hw_sm_create_query(nvc0, type);
533 if (hq) {
534 hq->base.funcs = &hw_query_funcs;
535 return (struct nvc0_query *)hq;
536 }
537
538 hq = nvc0_hw_metric_create_query(nvc0, type);
539 if (hq) {
540 hq->base.funcs = &hw_query_funcs;
541 return (struct nvc0_query *)hq;
542 }
543
544 hq = CALLOC_STRUCT(nvc0_hw_query);
545 if (!hq)
546 return NULL;
547
548 q = &hq->base;
549 q->funcs = &hw_query_funcs;
550 q->type = type;
551 q->index = index;
552
553 switch (q->type) {
554 case PIPE_QUERY_OCCLUSION_COUNTER:
555 case PIPE_QUERY_OCCLUSION_PREDICATE:
556 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
557 hq->rotate = 32;
558 space = NVC0_HW_QUERY_ALLOC_SPACE;
559 break;
560 case PIPE_QUERY_PIPELINE_STATISTICS:
561 hq->is64bit = true;
562 space = 512;
563 break;
564 case PIPE_QUERY_SO_STATISTICS:
565 hq->is64bit = true;
566 space = 64;
567 break;
568 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
569 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
570 case PIPE_QUERY_PRIMITIVES_GENERATED:
571 case PIPE_QUERY_PRIMITIVES_EMITTED:
572 hq->is64bit = true;
573 space = 32;
574 break;
575 case PIPE_QUERY_TIME_ELAPSED:
576 case PIPE_QUERY_TIMESTAMP:
577 case PIPE_QUERY_TIMESTAMP_DISJOINT:
578 case PIPE_QUERY_GPU_FINISHED:
579 space = 32;
580 break;
581 case NVC0_HW_QUERY_TFB_BUFFER_OFFSET:
582 space = 16;
583 break;
584 default:
585 debug_printf("invalid query type: %u\n", type);
586 FREE(q);
587 return NULL;
588 }
589
590 if (!nvc0_hw_query_allocate(nvc0, q, space)) {
591 FREE(hq);
592 return NULL;
593 }
594
595 if (hq->rotate) {
596 /* we advance before query_begin ! */
597 hq->offset -= hq->rotate;
598 hq->data -= hq->rotate / sizeof(*hq->data);
599 } else
600 if (!hq->is64bit)
601 hq->data[0] = 0; /* initialize sequence */
602
603 return q;
604 }
605
606 int
nvc0_hw_get_driver_query_info(struct nvc0_screen * screen,unsigned id,struct pipe_driver_query_info * info)607 nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
608 struct pipe_driver_query_info *info)
609 {
610 int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
611
612 num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0, NULL);
613 num_hw_metric_queries =
614 nvc0_hw_metric_get_driver_query_info(screen, 0, NULL);
615
616 if (!info)
617 return num_hw_sm_queries + num_hw_metric_queries;
618
619 if (id < num_hw_sm_queries)
620 return nvc0_hw_sm_get_driver_query_info(screen, id, info);
621
622 return nvc0_hw_metric_get_driver_query_info(screen,
623 id - num_hw_sm_queries, info);
624 }
625
626 void
nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf * push,struct nvc0_query * q,unsigned result_offset)627 nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push,
628 struct nvc0_query *q, unsigned result_offset)
629 {
630 struct nvc0_hw_query *hq = nvc0_hw_query(q);
631
632 PUSH_REFN(push, hq->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
633 nouveau_pushbuf_data(push, hq->bo, hq->offset + result_offset, 4 |
634 NVC0_IB_ENTRY_1_NO_PREFETCH);
635 }
636
637 void
nvc0_hw_query_fifo_wait(struct nvc0_context * nvc0,struct nvc0_query * q)638 nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q)
639 {
640 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
641 struct nvc0_hw_query *hq = nvc0_hw_query(q);
642 unsigned offset = hq->offset;
643
644 /* ensure the query's fence has been emitted */
645 if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED)
646 nouveau_fence_emit(hq->fence);
647
648 PUSH_SPACE(push, 5);
649 PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
650 BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
651 if (hq->is64bit) {
652 PUSH_DATAh(push, nvc0->screen->fence.bo->offset);
653 PUSH_DATA (push, nvc0->screen->fence.bo->offset);
654 PUSH_DATA (push, hq->fence->sequence);
655 } else {
656 PUSH_DATAh(push, hq->bo->offset + offset);
657 PUSH_DATA (push, hq->bo->offset + offset);
658 PUSH_DATA (push, hq->sequence);
659 }
660 PUSH_DATA (push, (1 << 12) |
661 NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL);
662 }
663