1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file v3d_job.c
25 *
26 * Functions for submitting V3D render jobs to the kernel.
27 */
28
29 #include <xf86drm.h>
30 #include "v3d_context.h"
31 /* The OQ/semaphore packets are the same across V3D versions. */
32 #define V3D_VERSION 33
33 #include "broadcom/cle/v3dx_pack.h"
34 #include "broadcom/common/v3d_macros.h"
35 #include "util/hash_table.h"
36 #include "util/ralloc.h"
37 #include "util/set.h"
38 #include "broadcom/clif/clif_dump.h"
39
40 void
v3d_job_free(struct v3d_context * v3d,struct v3d_job * job)41 v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
42 {
43 set_foreach(job->bos, entry) {
44 struct v3d_bo *bo = (struct v3d_bo *)entry->key;
45 v3d_bo_unreference(&bo);
46 }
47
48 _mesa_hash_table_remove_key(v3d->jobs, &job->key);
49
50 if (job->write_prscs) {
51 set_foreach(job->write_prscs, entry) {
52 const struct pipe_resource *prsc = entry->key;
53
54 _mesa_hash_table_remove_key(v3d->write_jobs, prsc);
55 }
56 }
57
58 for (int i = 0; i < job->nr_cbufs; i++) {
59 if (job->cbufs[i]) {
60 _mesa_hash_table_remove_key(v3d->write_jobs,
61 job->cbufs[i]->texture);
62 pipe_surface_reference(&job->cbufs[i], NULL);
63 }
64 }
65 if (job->zsbuf) {
66 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
67 if (rsc->separate_stencil)
68 _mesa_hash_table_remove_key(v3d->write_jobs,
69 &rsc->separate_stencil->base);
70
71 _mesa_hash_table_remove_key(v3d->write_jobs,
72 job->zsbuf->texture);
73 pipe_surface_reference(&job->zsbuf, NULL);
74 }
75 if (job->bbuf)
76 pipe_surface_reference(&job->bbuf, NULL);
77
78 if (v3d->job == job)
79 v3d->job = NULL;
80
81 v3d_destroy_cl(&job->bcl);
82 v3d_destroy_cl(&job->rcl);
83 v3d_destroy_cl(&job->indirect);
84 v3d_bo_unreference(&job->tile_alloc);
85 v3d_bo_unreference(&job->tile_state);
86
87 ralloc_free(job);
88 }
89
90 struct v3d_job *
v3d_job_create(struct v3d_context * v3d)91 v3d_job_create(struct v3d_context *v3d)
92 {
93 struct v3d_job *job = rzalloc(v3d, struct v3d_job);
94
95 job->v3d = v3d;
96
97 v3d_init_cl(job, &job->bcl);
98 v3d_init_cl(job, &job->rcl);
99 v3d_init_cl(job, &job->indirect);
100
101 job->draw_min_x = ~0;
102 job->draw_min_y = ~0;
103 job->draw_max_x = 0;
104 job->draw_max_y = 0;
105
106 job->bos = _mesa_set_create(job,
107 _mesa_hash_pointer,
108 _mesa_key_pointer_equal);
109 return job;
110 }
111
112 void
v3d_job_add_bo(struct v3d_job * job,struct v3d_bo * bo)113 v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo)
114 {
115 if (!bo)
116 return;
117
118 if (_mesa_set_search(job->bos, bo))
119 return;
120
121 v3d_bo_reference(bo);
122 _mesa_set_add(job->bos, bo);
123 job->referenced_size += bo->size;
124
125 uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
126
127 if (job->submit.bo_handle_count >= job->bo_handles_size) {
128 job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
129 bo_handles = reralloc(job, bo_handles,
130 uint32_t, job->bo_handles_size);
131 job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
132 }
133 bo_handles[job->submit.bo_handle_count++] = bo->handle;
134 }
135
136 void
v3d_job_add_write_resource(struct v3d_job * job,struct pipe_resource * prsc)137 v3d_job_add_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
138 {
139 struct v3d_context *v3d = job->v3d;
140
141 if (!job->write_prscs) {
142 job->write_prscs = _mesa_set_create(job,
143 _mesa_hash_pointer,
144 _mesa_key_pointer_equal);
145 }
146
147 _mesa_set_add(job->write_prscs, prsc);
148 _mesa_hash_table_insert(v3d->write_jobs, prsc, job);
149 }
150
151 void
v3d_flush_jobs_using_bo(struct v3d_context * v3d,struct v3d_bo * bo)152 v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo)
153 {
154 hash_table_foreach(v3d->jobs, entry) {
155 struct v3d_job *job = entry->data;
156
157 if (_mesa_set_search(job->bos, bo))
158 v3d_job_submit(v3d, job);
159 }
160 }
161
162 void
v3d_job_add_tf_write_resource(struct v3d_job * job,struct pipe_resource * prsc)163 v3d_job_add_tf_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
164 {
165 v3d_job_add_write_resource(job, prsc);
166
167 if (!job->tf_write_prscs)
168 job->tf_write_prscs = _mesa_pointer_set_create(job);
169
170 _mesa_set_add(job->tf_write_prscs, prsc);
171 }
172
173 static bool
v3d_job_writes_resource_from_tf(struct v3d_job * job,struct pipe_resource * prsc)174 v3d_job_writes_resource_from_tf(struct v3d_job *job,
175 struct pipe_resource *prsc)
176 {
177 if (!job->tf_enabled)
178 return false;
179
180 if (!job->tf_write_prscs)
181 return false;
182
183 return _mesa_set_search(job->tf_write_prscs, prsc) != NULL;
184 }
185
186 void
v3d_flush_jobs_writing_resource(struct v3d_context * v3d,struct pipe_resource * prsc,enum v3d_flush_cond flush_cond,bool is_compute_pipeline)187 v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
188 struct pipe_resource *prsc,
189 enum v3d_flush_cond flush_cond,
190 bool is_compute_pipeline)
191 {
192 struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
193 prsc);
194 struct v3d_resource *rsc = v3d_resource(prsc);
195
196 /* We need to sync if graphics pipeline reads a resource written
197 * by the compute pipeline. The same would be needed for the case of
198 * graphics-compute dependency but nowadays all compute jobs
199 * are serialized with the previous submitted job.
200 */
201 if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
202 v3d->sync_on_last_compute_job = true;
203 rsc->compute_written = false;
204 }
205
206 if (!entry)
207 return;
208
209 struct v3d_job *job = entry->data;
210
211 bool needs_flush;
212 switch (flush_cond) {
213 case V3D_FLUSH_ALWAYS:
214 needs_flush = true;
215 break;
216 case V3D_FLUSH_NOT_CURRENT_JOB:
217 needs_flush = !v3d->job || v3d->job != job;
218 break;
219 case V3D_FLUSH_DEFAULT:
220 default:
221 /* For writes from TF in the same job we use the "Wait for TF"
222 * feature provided by the hardware so we don't want to flush.
223 * The exception to this is when the caller is about to map the
224 * resource since in that case we don't have a 'Wait for TF'
225 * command the in command stream. In this scenario the caller
226 * is expected to set 'always_flush' to True.
227 */
228 needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);
229 }
230
231 if (needs_flush)
232 v3d_job_submit(v3d, job);
233 }
234
235 void
v3d_flush_jobs_reading_resource(struct v3d_context * v3d,struct pipe_resource * prsc,enum v3d_flush_cond flush_cond,bool is_compute_pipeline)236 v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
237 struct pipe_resource *prsc,
238 enum v3d_flush_cond flush_cond,
239 bool is_compute_pipeline)
240 {
241 struct v3d_resource *rsc = v3d_resource(prsc);
242
243 /* We only need to force the flush on TF writes, which is the only
244 * case where we might skip the flush to use the 'Wait for TF'
245 * command. Here we are flushing for a read, which means that the
246 * caller intends to write to the resource, so we don't care if
247 * there was a previous TF write to it.
248 */
249 v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
250 is_compute_pipeline);
251
252 hash_table_foreach(v3d->jobs, entry) {
253 struct v3d_job *job = entry->data;
254
255 if (!_mesa_set_search(job->bos, rsc->bo))
256 continue;
257
258 bool needs_flush;
259 switch (flush_cond) {
260 case V3D_FLUSH_NOT_CURRENT_JOB:
261 needs_flush = !v3d->job || v3d->job != job;
262 break;
263 case V3D_FLUSH_ALWAYS:
264 case V3D_FLUSH_DEFAULT:
265 default:
266 needs_flush = true;
267 }
268
269 if (needs_flush)
270 v3d_job_submit(v3d, job);
271
272 /* Reminder: v3d->jobs is safe to keep iterating even
273 * after deletion of an entry.
274 */
275 continue;
276 }
277 }
278
279 /**
280 * Returns a v3d_job struture for tracking V3D rendering to a particular FBO.
281 *
282 * If we've already started rendering to this FBO, then return the same job,
283 * otherwise make a new one. If we're beginning rendering to an FBO, make
284 * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
285 * have been flushed.
286 */
287 struct v3d_job *
v3d_get_job(struct v3d_context * v3d,uint32_t nr_cbufs,struct pipe_surface ** cbufs,struct pipe_surface * zsbuf,struct pipe_surface * bbuf)288 v3d_get_job(struct v3d_context *v3d,
289 uint32_t nr_cbufs,
290 struct pipe_surface **cbufs,
291 struct pipe_surface *zsbuf,
292 struct pipe_surface *bbuf)
293 {
294 /* Return the existing job for this FBO if we have one */
295 struct v3d_job_key local_key = {
296 .cbufs = {
297 cbufs[0],
298 cbufs[1],
299 cbufs[2],
300 cbufs[3],
301 },
302 .zsbuf = zsbuf,
303 .bbuf = bbuf,
304 };
305 struct hash_entry *entry = _mesa_hash_table_search(v3d->jobs,
306 &local_key);
307 if (entry)
308 return entry->data;
309
310 /* Creating a new job. Make sure that any previous jobs reading or
311 * writing these buffers are flushed.
312 */
313 struct v3d_job *job = v3d_job_create(v3d);
314 job->nr_cbufs = nr_cbufs;
315
316 for (int i = 0; i < job->nr_cbufs; i++) {
317 if (cbufs[i]) {
318 v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
319 V3D_FLUSH_DEFAULT,
320 false);
321 pipe_surface_reference(&job->cbufs[i], cbufs[i]);
322
323 if (cbufs[i]->texture->nr_samples > 1)
324 job->msaa = true;
325 }
326 }
327 if (zsbuf) {
328 v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
329 V3D_FLUSH_DEFAULT,
330 false);
331 pipe_surface_reference(&job->zsbuf, zsbuf);
332 if (zsbuf->texture->nr_samples > 1)
333 job->msaa = true;
334 }
335 if (bbuf) {
336 pipe_surface_reference(&job->bbuf, bbuf);
337 if (bbuf->texture->nr_samples > 1)
338 job->msaa = true;
339 }
340
341 for (int i = 0; i < job->nr_cbufs; i++) {
342 if (cbufs[i])
343 _mesa_hash_table_insert(v3d->write_jobs,
344 cbufs[i]->texture, job);
345 }
346 if (zsbuf) {
347 _mesa_hash_table_insert(v3d->write_jobs, zsbuf->texture, job);
348
349 struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
350 if (rsc->separate_stencil) {
351 v3d_flush_jobs_reading_resource(v3d,
352 &rsc->separate_stencil->base,
353 V3D_FLUSH_DEFAULT,
354 false);
355 _mesa_hash_table_insert(v3d->write_jobs,
356 &rsc->separate_stencil->base,
357 job);
358 }
359 }
360
361 memcpy(&job->key, &local_key, sizeof(local_key));
362 _mesa_hash_table_insert(v3d->jobs, &job->key, job);
363
364 return job;
365 }
366
367 struct v3d_job *
v3d_get_job_for_fbo(struct v3d_context * v3d)368 v3d_get_job_for_fbo(struct v3d_context *v3d)
369 {
370 if (v3d->job)
371 return v3d->job;
372
373 uint32_t nr_cbufs = v3d->framebuffer.nr_cbufs;
374 struct pipe_surface **cbufs = v3d->framebuffer.cbufs;
375 struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf;
376 struct v3d_job *job = v3d_get_job(v3d, nr_cbufs, cbufs, zsbuf, NULL);
377
378 if (v3d->framebuffer.samples >= 1)
379 job->msaa = true;
380
381 v3d_get_tile_buffer_size(job->msaa, job->nr_cbufs,
382 job->cbufs, job->bbuf,
383 &job->tile_width,
384 &job->tile_height,
385 &job->internal_bpp);
386
387 /* The dirty flags are tracking what's been updated while v3d->job has
388 * been bound, so set them all to ~0 when switching between jobs. We
389 * also need to reset all state at the start of rendering.
390 */
391 v3d->dirty = ~0;
392
393 /* If we're binding to uninitialized buffers, no need to load their
394 * contents before drawing.
395 */
396 for (int i = 0; i < nr_cbufs; i++) {
397 if (cbufs[i]) {
398 struct v3d_resource *rsc = v3d_resource(cbufs[i]->texture);
399 if (!rsc->writes)
400 job->clear |= PIPE_CLEAR_COLOR0 << i;
401 }
402 }
403
404 if (zsbuf) {
405 struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
406 if (!rsc->writes)
407 job->clear |= PIPE_CLEAR_DEPTH;
408
409 if (rsc->separate_stencil)
410 rsc = rsc->separate_stencil;
411
412 if (!rsc->writes)
413 job->clear |= PIPE_CLEAR_STENCIL;
414 }
415
416 job->draw_tiles_x = DIV_ROUND_UP(v3d->framebuffer.width,
417 job->tile_width);
418 job->draw_tiles_y = DIV_ROUND_UP(v3d->framebuffer.height,
419 job->tile_height);
420
421 v3d->job = job;
422
423 return job;
424 }
425
426 static void
v3d_clif_dump(struct v3d_context * v3d,struct v3d_job * job)427 v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job)
428 {
429 if (!(unlikely(V3D_DEBUG & (V3D_DEBUG_CL |
430 V3D_DEBUG_CL_NO_BIN |
431 V3D_DEBUG_CLIF))))
432 return;
433
434 struct clif_dump *clif = clif_dump_init(&v3d->screen->devinfo,
435 stderr,
436 V3D_DEBUG & (V3D_DEBUG_CL |
437 V3D_DEBUG_CL_NO_BIN),
438 V3D_DEBUG & V3D_DEBUG_CL_NO_BIN);
439
440 set_foreach(job->bos, entry) {
441 struct v3d_bo *bo = (void *)entry->key;
442 char *name = ralloc_asprintf(NULL, "%s_0x%x",
443 bo->name, bo->offset);
444
445 v3d_bo_map(bo);
446 clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
447
448 ralloc_free(name);
449 }
450
451 clif_dump(clif, &job->submit);
452
453 clif_dump_destroy(clif);
454 }
455
456 static void
v3d_read_and_accumulate_primitive_counters(struct v3d_context * v3d)457 v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
458 {
459 assert(v3d->prim_counts);
460
461 perf_debug("stalling on TF counts readback\n");
462 struct v3d_resource *rsc = v3d_resource(v3d->prim_counts);
463 if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) {
464 uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
465 v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
466 /* When we only have a vertex shader we determine the primitive
467 * count in the CPU so don't update it here again.
468 */
469 if (v3d->prog.gs)
470 v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
471 }
472 }
473
474 /**
475 * Submits the job to the kernel and then reinitializes it.
476 */
477 void
v3d_job_submit(struct v3d_context * v3d,struct v3d_job * job)478 v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
479 {
480 struct v3d_screen *screen = v3d->screen;
481
482 if (!job->needs_flush)
483 goto done;
484
485 /* The GL_PRIMITIVES_GENERATED query is included with
486 * OES_geometry_shader.
487 */
488 job->needs_primitives_generated =
489 v3d->n_primitives_generated_queries_in_flight > 0 &&
490 v3d->prog.gs;
491
492 if (job->needs_primitives_generated)
493 v3d_ensure_prim_counts_allocated(v3d);
494
495 if (screen->devinfo.ver >= 41)
496 v3d41_emit_rcl(job);
497 else
498 v3d33_emit_rcl(job);
499
500 if (cl_offset(&job->bcl) > 0) {
501 if (screen->devinfo.ver >= 41)
502 v3d41_bcl_epilogue(v3d, job);
503 else
504 v3d33_bcl_epilogue(v3d, job);
505 }
506
507 /* While the RCL will implicitly depend on the last RCL to have
508 * finished, we also need to block on any previous TFU job we may have
509 * dispatched.
510 */
511 job->submit.in_sync_rcl = v3d->out_sync;
512
513 /* Update the sync object for the last rendering by our context. */
514 job->submit.out_sync = v3d->out_sync;
515
516 job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
517 job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
518
519 if (v3d->active_perfmon) {
520 assert(screen->has_perfmon);
521 job->submit.perfmon_id = v3d->active_perfmon->kperfmon_id;
522 }
523
524 /* If we are submitting a job with a different perfmon, we need to
525 * ensure the previous one fully finishes before starting this;
526 * otherwise it would wrongly mix counter results.
527 */
528 if (v3d->active_perfmon != v3d->last_perfmon) {
529 v3d->last_perfmon = v3d->active_perfmon;
530 job->submit.in_sync_bcl = v3d->out_sync;
531 }
532
533 job->submit.flags = 0;
534 if (job->tmu_dirty_rcl && screen->has_cache_flush)
535 job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
536
537 /* On V3D 4.1, the tile alloc/state setup moved to register writes
538 * instead of binner packets.
539 */
540 if (screen->devinfo.ver >= 41) {
541 v3d_job_add_bo(job, job->tile_alloc);
542 job->submit.qma = job->tile_alloc->offset;
543 job->submit.qms = job->tile_alloc->size;
544
545 v3d_job_add_bo(job, job->tile_state);
546 job->submit.qts = job->tile_state->offset;
547 }
548
549 v3d_clif_dump(v3d, job);
550
551 if (!(unlikely(V3D_DEBUG & V3D_DEBUG_NORAST))) {
552 int ret;
553
554 ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
555 static bool warned = false;
556 if (ret && !warned) {
557 fprintf(stderr, "Draw call returned %s. "
558 "Expect corruption.\n", strerror(errno));
559 warned = true;
560 } else if (!ret) {
561 if (v3d->active_perfmon)
562 v3d->active_perfmon->job_submitted = true;
563 }
564
565 /* If we are submitting a job in the middle of transform
566 * feedback or there is a primitives generated query with a
567 * geometry shader then we need to read the primitive counts
568 * and accumulate them, otherwise they will be reset at the
569 * start of the next draw when we emit the Tile Binning Mode
570 * Configuration packet.
571 *
572 * If the job doesn't have any TF draw calls, then we know
573 * the primitive count must be zero and we can skip stalling
574 * for this. This also fixes a problem because it seems that
575 * in this scenario the counters are not reset with the Tile
576 * Binning Mode Configuration packet, which would translate
577 * to us reading an obsolete (possibly non-zero) value from
578 * the GPU counters.
579 */
580 if (job->needs_primitives_generated ||
581 (v3d->streamout.num_targets &&
582 job->tf_draw_calls_queued > 0))
583 v3d_read_and_accumulate_primitive_counters(v3d);
584 }
585
586 done:
587 v3d_job_free(v3d, job);
588 }
589
590 static bool
v3d_job_compare(const void * a,const void * b)591 v3d_job_compare(const void *a, const void *b)
592 {
593 return memcmp(a, b, sizeof(struct v3d_job_key)) == 0;
594 }
595
596 static uint32_t
v3d_job_hash(const void * key)597 v3d_job_hash(const void *key)
598 {
599 return _mesa_hash_data(key, sizeof(struct v3d_job_key));
600 }
601
602 void
v3d_job_init(struct v3d_context * v3d)603 v3d_job_init(struct v3d_context *v3d)
604 {
605 v3d->jobs = _mesa_hash_table_create(v3d,
606 v3d_job_hash,
607 v3d_job_compare);
608 v3d->write_jobs = _mesa_hash_table_create(v3d,
609 _mesa_hash_pointer,
610 _mesa_key_pointer_equal);
611 }
612
613