1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file v3d_job.c
25 *
26 * Functions for submitting V3D render jobs to the kernel.
27 */
28
29 #include <xf86drm.h>
30 #include <libsync.h>
31 #include "v3d_context.h"
32 /* The OQ/semaphore packets are the same across V3D versions. */
33 #define V3D_VERSION 42
34 #include "broadcom/cle/v3dx_pack.h"
35 #include "broadcom/common/v3d_macros.h"
36 #include "util/hash_table.h"
37 #include "util/perf/cpu_trace.h"
38 #include "util/ralloc.h"
39 #include "util/set.h"
40 #include "broadcom/clif/clif_dump.h"
41
42 void
v3d_job_free(struct v3d_context * v3d,struct v3d_job * job)43 v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
44 {
45 set_foreach(job->bos, entry) {
46 struct v3d_bo *bo = (struct v3d_bo *)entry->key;
47 v3d_bo_unreference(&bo);
48 }
49
50 _mesa_hash_table_remove_key(v3d->jobs, &job->key);
51
52 if (job->write_prscs) {
53 set_foreach(job->write_prscs, entry) {
54 const struct pipe_resource *prsc = entry->key;
55
56 _mesa_hash_table_remove_key(v3d->write_jobs, prsc);
57 }
58 }
59
60 for (int i = 0; i < job->nr_cbufs; i++) {
61 if (job->cbufs[i]) {
62 _mesa_hash_table_remove_key(v3d->write_jobs,
63 job->cbufs[i]->texture);
64 pipe_surface_reference(&job->cbufs[i], NULL);
65 }
66 }
67 if (job->zsbuf) {
68 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
69 if (rsc->separate_stencil)
70 _mesa_hash_table_remove_key(v3d->write_jobs,
71 &rsc->separate_stencil->base);
72
73 _mesa_hash_table_remove_key(v3d->write_jobs,
74 job->zsbuf->texture);
75 pipe_surface_reference(&job->zsbuf, NULL);
76 }
77 if (job->bbuf)
78 pipe_surface_reference(&job->bbuf, NULL);
79
80 if (job->dbuf)
81 pipe_surface_reference(&job->dbuf, NULL);
82
83 if (v3d->job == job)
84 v3d->job = NULL;
85
86 v3d_destroy_cl(&job->bcl);
87 v3d_destroy_cl(&job->rcl);
88 v3d_destroy_cl(&job->indirect);
89 v3d_bo_unreference(&job->tile_alloc);
90 v3d_bo_unreference(&job->tile_state);
91
92 ralloc_free(job);
93 }
94
95 struct v3d_job *
v3d_job_create(struct v3d_context * v3d)96 v3d_job_create(struct v3d_context *v3d)
97 {
98 struct v3d_job *job = rzalloc(v3d, struct v3d_job);
99
100 job->v3d = v3d;
101
102 v3d_init_cl(job, &job->bcl);
103 v3d_init_cl(job, &job->rcl);
104 v3d_init_cl(job, &job->indirect);
105
106 job->draw_min_x = ~0;
107 job->draw_min_y = ~0;
108 job->draw_max_x = 0;
109 job->draw_max_y = 0;
110
111 job->bos = _mesa_set_create(job,
112 _mesa_hash_pointer,
113 _mesa_key_pointer_equal);
114 return job;
115 }
116
117 void
v3d_job_add_bo(struct v3d_job * job,struct v3d_bo * bo)118 v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo)
119 {
120 if (!bo)
121 return;
122
123 if (_mesa_set_search(job->bos, bo))
124 return;
125
126 v3d_bo_reference(bo);
127 _mesa_set_add(job->bos, bo);
128 job->referenced_size += bo->size;
129
130 uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
131
132 if (job->submit.bo_handle_count >= job->bo_handles_size) {
133 job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
134 bo_handles = reralloc(job, bo_handles,
135 uint32_t, job->bo_handles_size);
136 job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
137 }
138 bo_handles[job->submit.bo_handle_count++] = bo->handle;
139 }
140
141 void
v3d_job_add_write_resource(struct v3d_job * job,struct pipe_resource * prsc)142 v3d_job_add_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
143 {
144 struct v3d_context *v3d = job->v3d;
145
146 if (!job->write_prscs) {
147 job->write_prscs = _mesa_set_create(job,
148 _mesa_hash_pointer,
149 _mesa_key_pointer_equal);
150 }
151
152 _mesa_set_add(job->write_prscs, prsc);
153 _mesa_hash_table_insert(v3d->write_jobs, prsc, job);
154 }
155
156 void
v3d_flush_jobs_using_bo(struct v3d_context * v3d,struct v3d_bo * bo)157 v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo)
158 {
159 MESA_TRACE_FUNC();
160
161 hash_table_foreach(v3d->jobs, entry) {
162 struct v3d_job *job = entry->data;
163
164 if (_mesa_set_search(job->bos, bo))
165 v3d_job_submit(v3d, job);
166 }
167 }
168
169 void
v3d_job_add_tf_write_resource(struct v3d_job * job,struct pipe_resource * prsc)170 v3d_job_add_tf_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
171 {
172 v3d_job_add_write_resource(job, prsc);
173
174 if (!job->tf_write_prscs)
175 job->tf_write_prscs = _mesa_pointer_set_create(job);
176
177 _mesa_set_add(job->tf_write_prscs, prsc);
178 }
179
180 static bool
v3d_job_writes_resource_from_tf(struct v3d_job * job,struct pipe_resource * prsc)181 v3d_job_writes_resource_from_tf(struct v3d_job *job,
182 struct pipe_resource *prsc)
183 {
184 if (!job->tf_enabled)
185 return false;
186
187 if (!job->tf_write_prscs)
188 return false;
189
190 return _mesa_set_search(job->tf_write_prscs, prsc) != NULL;
191 }
192
193 void
v3d_flush_jobs_writing_resource(struct v3d_context * v3d,struct pipe_resource * prsc,enum v3d_flush_cond flush_cond,bool is_compute_pipeline)194 v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
195 struct pipe_resource *prsc,
196 enum v3d_flush_cond flush_cond,
197 bool is_compute_pipeline)
198 {
199 struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
200 prsc);
201 if (!entry)
202 return;
203
204 struct v3d_resource *rsc = v3d_resource(prsc);
205
206 /* We need to sync if graphics pipeline reads a resource written
207 * by the compute pipeline. The same is needed for the case of
208 * graphics-compute dependency but flushing the job.
209 */
210 if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
211 v3d->sync_on_last_compute_job = true;
212 rsc->compute_written = false;
213 }
214 if (is_compute_pipeline && rsc->bo != NULL && rsc->graphics_written) {
215 flush_cond = V3D_FLUSH_ALWAYS;
216 rsc->graphics_written = false;
217 }
218
219 struct v3d_job *job = entry->data;
220
221 bool needs_flush;
222 switch (flush_cond) {
223 case V3D_FLUSH_ALWAYS:
224 needs_flush = true;
225 break;
226 case V3D_FLUSH_NOT_CURRENT_JOB:
227 needs_flush = !v3d->job || v3d->job != job;
228 break;
229 case V3D_FLUSH_DEFAULT:
230 default:
231 /* For writes from TF in the same job we use the "Wait for TF"
232 * feature provided by the hardware so we don't want to flush.
233 * The exception to this is when the caller is about to map the
234 * resource since in that case we don't have a 'Wait for TF'
235 * command the in command stream. In this scenario the caller
236 * is expected to set 'always_flush' to True.
237 */
238 needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);
239 }
240
241 if (needs_flush) {
242 MESA_TRACE_FUNC();
243 v3d_job_submit(v3d, job);
244 }
245 }
246
247 void
v3d_flush_jobs_reading_resource(struct v3d_context * v3d,struct pipe_resource * prsc,enum v3d_flush_cond flush_cond,bool is_compute_pipeline)248 v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
249 struct pipe_resource *prsc,
250 enum v3d_flush_cond flush_cond,
251 bool is_compute_pipeline)
252 {
253 struct v3d_resource *rsc = v3d_resource(prsc);
254
255 /* We only need to force the flush on TF writes, which is the only
256 * case where we might skip the flush to use the 'Wait for TF'
257 * command. Here we are flushing for a read, which means that the
258 * caller intends to write to the resource, so we don't care if
259 * there was a previous TF write to it.
260 */
261 v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
262 is_compute_pipeline);
263
264 hash_table_foreach(v3d->jobs, entry) {
265 struct v3d_job *job = entry->data;
266
267 if (!_mesa_set_search(job->bos, rsc->bo))
268 continue;
269
270 bool needs_flush;
271 switch (flush_cond) {
272 case V3D_FLUSH_NOT_CURRENT_JOB:
273 needs_flush = !v3d->job || v3d->job != job;
274 break;
275 case V3D_FLUSH_ALWAYS:
276 case V3D_FLUSH_DEFAULT:
277 default:
278 needs_flush = true;
279 }
280
281 if (needs_flush) {
282 MESA_TRACE_FUNC();
283 v3d_job_submit(v3d, job);
284 }
285
286 /* Reminder: v3d->jobs is safe to keep iterating even
287 * after deletion of an entry.
288 */
289 continue;
290 }
291 }
292
293 /**
294 * Returns a v3d_job structure for tracking V3D rendering to a particular FBO.
295 *
296 * If we've already started rendering to this FBO, then return the same job,
297 * otherwise make a new one. If we're beginning rendering to an FBO, make
298 * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
299 * have been flushed.
300 */
301 struct v3d_job *
v3d_get_job(struct v3d_context * v3d,uint32_t nr_cbufs,struct pipe_surface ** cbufs,struct pipe_surface * zsbuf,struct pipe_surface * bbuf)302 v3d_get_job(struct v3d_context *v3d,
303 uint32_t nr_cbufs,
304 struct pipe_surface **cbufs,
305 struct pipe_surface *zsbuf,
306 struct pipe_surface *bbuf)
307 {
308 /* Return the existing job for this FBO if we have one */
309 struct v3d_job_key local_key = {
310 .cbufs = {
311 cbufs[0],
312 cbufs[1],
313 cbufs[2],
314 cbufs[3],
315 cbufs[4],
316 cbufs[5],
317 cbufs[6],
318 cbufs[7],
319 },
320 .zsbuf = zsbuf,
321 .bbuf = bbuf,
322 };
323 struct hash_entry *entry = _mesa_hash_table_search(v3d->jobs,
324 &local_key);
325 if (entry)
326 return entry->data;
327
328 /* Creating a new job. Make sure that any previous jobs reading or
329 * writing these buffers are flushed.
330 */
331 struct v3d_job *job = v3d_job_create(v3d);
332 job->nr_cbufs = nr_cbufs;
333
334 for (int i = 0; i < job->nr_cbufs; i++) {
335 if (cbufs[i]) {
336 v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
337 V3D_FLUSH_DEFAULT,
338 false);
339 pipe_surface_reference(&job->cbufs[i], cbufs[i]);
340
341 if (cbufs[i]->texture->nr_samples > 1)
342 job->msaa = true;
343 }
344 }
345 if (zsbuf) {
346 v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
347 V3D_FLUSH_DEFAULT,
348 false);
349 pipe_surface_reference(&job->zsbuf, zsbuf);
350 if (zsbuf->texture->nr_samples > 1)
351 job->msaa = true;
352 }
353 if (bbuf) {
354 pipe_surface_reference(&job->bbuf, bbuf);
355 if (bbuf->texture->nr_samples > 1)
356 job->msaa = true;
357 }
358
359 for (int i = 0; i < job->nr_cbufs; i++) {
360 if (cbufs[i])
361 _mesa_hash_table_insert(v3d->write_jobs,
362 cbufs[i]->texture, job);
363 }
364 if (zsbuf) {
365 _mesa_hash_table_insert(v3d->write_jobs, zsbuf->texture, job);
366
367 struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
368 if (rsc->separate_stencil) {
369 v3d_flush_jobs_reading_resource(v3d,
370 &rsc->separate_stencil->base,
371 V3D_FLUSH_DEFAULT,
372 false);
373 _mesa_hash_table_insert(v3d->write_jobs,
374 &rsc->separate_stencil->base,
375 job);
376 }
377 }
378
379 /* By default we disable double buffer but we allow it to be enabled
380 * later on (except for msaa) if we don't find any other reason
381 * to disable it.
382 */
383 job->can_use_double_buffer = !job->msaa && V3D_DBG(DOUBLE_BUFFER);
384 job->double_buffer = false;
385
386 memcpy(&job->key, &local_key, sizeof(local_key));
387 _mesa_hash_table_insert(v3d->jobs, &job->key, job);
388
389 return job;
390 }
391
392 struct v3d_job *
v3d_get_job_for_fbo(struct v3d_context * v3d)393 v3d_get_job_for_fbo(struct v3d_context *v3d)
394 {
395 if (v3d->job)
396 return v3d->job;
397
398 uint32_t nr_cbufs = v3d->framebuffer.nr_cbufs;
399 struct pipe_surface **cbufs = v3d->framebuffer.cbufs;
400 struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf;
401 struct v3d_job *job = v3d_get_job(v3d, nr_cbufs, cbufs, zsbuf, NULL);
402
403 if (v3d->framebuffer.samples >= 1) {
404 job->msaa = true;
405 job->double_buffer = false;
406 }
407
408 v3d_get_tile_buffer_size(&v3d->screen->devinfo,
409 job->msaa, job->double_buffer,
410 job->nr_cbufs, job->cbufs, job->bbuf,
411 &job->tile_desc.width,
412 &job->tile_desc.height,
413 &job->internal_bpp);
414
415 /* The dirty flags are tracking what's been updated while v3d->job has
416 * been bound, so set them all to ~0 when switching between jobs. We
417 * also need to reset all state at the start of rendering.
418 */
419 v3d->dirty = ~0;
420
421 /* If we're binding to uninitialized buffers, no need to load their
422 * contents before drawing.
423 */
424 for (int i = 0; i < nr_cbufs; i++) {
425 if (cbufs[i]) {
426 struct v3d_resource *rsc = v3d_resource(cbufs[i]->texture);
427 if (!rsc->writes)
428 job->clear_tlb |= PIPE_CLEAR_COLOR0 << i;
429 }
430 }
431
432 if (zsbuf) {
433 struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
434 if (!rsc->writes)
435 job->clear_tlb |= PIPE_CLEAR_DEPTH;
436
437 if (rsc->separate_stencil)
438 rsc = rsc->separate_stencil;
439
440 if (!rsc->writes)
441 job->clear_tlb |= PIPE_CLEAR_STENCIL;
442 }
443
444 job->tile_desc.draw_x = DIV_ROUND_UP(v3d->framebuffer.width,
445 job->tile_desc.width);
446 job->tile_desc.draw_y = DIV_ROUND_UP(v3d->framebuffer.height,
447 job->tile_desc.height);
448
449 v3d->job = job;
450
451 return job;
452 }
453
454 static void
v3d_clif_dump(struct v3d_context * v3d,struct v3d_job * job)455 v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job)
456 {
457 if (!(V3D_DBG(CL) ||
458 V3D_DBG(CL_NO_BIN) ||
459 V3D_DBG(CLIF)))
460 return;
461
462 struct clif_dump *clif = clif_dump_init(&v3d->screen->devinfo,
463 stderr,
464 V3D_DBG(CL) ||
465 V3D_DBG(CL_NO_BIN),
466 V3D_DBG(CL_NO_BIN));
467
468 set_foreach(job->bos, entry) {
469 struct v3d_bo *bo = (void *)entry->key;
470 char *name = ralloc_asprintf(NULL, "%s_0x%x",
471 bo->name, bo->offset);
472
473 v3d_bo_map(bo);
474 clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
475
476 ralloc_free(name);
477 }
478
479 clif_dump(clif, &job->submit);
480
481 clif_dump_destroy(clif);
482 }
483
484 static void
v3d_read_and_accumulate_primitive_counters(struct v3d_context * v3d)485 v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
486 {
487 assert(v3d->prim_counts);
488
489 perf_debug("stalling on TF counts readback\n");
490 struct v3d_resource *rsc = v3d_resource(v3d->prim_counts);
491 if (v3d_bo_wait(rsc->bo, OS_TIMEOUT_INFINITE, "prim-counts")) {
492 uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
493 v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
494 /* When we only have a vertex shader with no primitive
495 * restart, we determine the primitive count in the CPU so
496 * don't update it here again.
497 */
498 if (v3d->prog.gs || v3d->prim_restart) {
499 v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
500 uint8_t prim_mode =
501 v3d->prog.gs ? v3d->prog.gs->prog_data.gs->out_prim_type
502 : v3d->prim_mode;
503 uint32_t vertices_written =
504 map[V3D_PRIM_COUNTS_TF_WRITTEN] * mesa_vertices_per_prim(prim_mode);
505 for (int i = 0; i < v3d->streamout.num_targets; i++) {
506 v3d_stream_output_target(v3d->streamout.targets[i])->offset +=
507 vertices_written;
508 }
509 }
510 }
511 }
512
513 static void
alloc_tile_state(struct v3d_job * job)514 alloc_tile_state(struct v3d_job *job)
515 {
516 assert(!job->tile_alloc && !job->tile_state);
517
518 /* The PTB will request the tile alloc initial size per tile at start
519 * of tile binning.
520 */
521 uint32_t tile_alloc_size =
522 MAX2(job->num_layers, 1) * job->tile_desc.draw_x *
523 job->tile_desc.draw_y * 64;
524
525 /* The PTB allocates in aligned 4k chunks after the initial setup. */
526 tile_alloc_size = align(tile_alloc_size, 4096);
527
528 /* Include the first two chunk allocations that the PTB does so that
529 * we definitely clear the OOM condition before triggering one (the HW
530 * won't trigger OOM during the first allocations).
531 */
532 tile_alloc_size += 8192;
533
534 /* For performance, allocate some extra initial memory after the PTB's
535 * minimal allocations, so that we hopefully don't have to block the
536 * GPU on the kernel handling an OOM signal.
537 */
538 tile_alloc_size += 512 * 1024;
539
540 job->tile_alloc = v3d_bo_alloc(job->v3d->screen, tile_alloc_size,
541 "tile_alloc");
542 uint32_t tsda_per_tile_size = 256;
543 job->tile_state = v3d_bo_alloc(job->v3d->screen,
544 MAX2(job->num_layers, 1) *
545 job->tile_desc.draw_y *
546 job->tile_desc.draw_x *
547 tsda_per_tile_size,
548 "TSDA");
549 }
550
551 static void
enable_double_buffer_mode(struct v3d_job * job)552 enable_double_buffer_mode(struct v3d_job *job)
553 {
554 /* Don't enable if we have seen incompatibilities */
555 if (!job->can_use_double_buffer)
556 return;
557
558 /* For now we only allow double buffer via envvar and only for jobs
559 * that are not MSAA, which is incompatible.
560 */
561 assert(V3D_DBG(DOUBLE_BUFFER) && !job->msaa);
562
563 /* Tile loads are serialized against stores, in which case we don't get
564 * any benefits from enabling double-buffer and would just pay the price
565 * of a smaller tile size instead. Similarly, we only benefit from
566 * double-buffer if we have tile stores, as the point of this mode is
567 * to execute rendering of a new tile while we store the previous one to
568 * hide latency on the tile store operation.
569 */
570 if (job->load)
571 return;
572
573 if (!job->store)
574 return;
575
576 if (!v3d_double_buffer_score_ok(&job->double_buffer_score))
577 return;
578
579 /* Enable double-buffer mode.
580 *
581 * This will reduce the tile size so we need to recompute state
582 * that depends on this and rewrite the TILE_BINNING_MODE_CFG
583 * we emitted earlier in the CL.
584 */
585 job->double_buffer = true;
586 v3d_get_tile_buffer_size(&job->v3d->screen->devinfo,
587 job->msaa, job->double_buffer,
588 job->nr_cbufs, job->cbufs, job->bbuf,
589 &job->tile_desc.width, &job->tile_desc.height,
590 &job->internal_bpp);
591
592 job->tile_desc.draw_x = DIV_ROUND_UP(job->draw_width,
593 job->tile_desc.width);
594 job->tile_desc.draw_y = DIV_ROUND_UP(job->draw_height,
595 job->tile_desc.height);
596
597 struct v3d_device_info *devinfo = &job->v3d->screen->devinfo;
598 v3d_X(devinfo, job_emit_enable_double_buffer)(job);
599 }
600
601 /**
602 * Submits the job to the kernel and then reinitializes it.
603 */
604 void
v3d_job_submit(struct v3d_context * v3d,struct v3d_job * job)605 v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
606 {
607 struct v3d_screen *screen = v3d->screen;
608 struct v3d_device_info *devinfo = &screen->devinfo;
609
610 MESA_TRACE_FUNC();
611
612 if (!job->needs_flush)
613 goto done;
614
615 /* The GL_PRIMITIVES_GENERATED query is included with
616 * OES_geometry_shader.
617 */
618 job->needs_primitives_generated =
619 v3d->n_primitives_generated_queries_in_flight > 0 &&
620 v3d->prog.gs;
621
622 if (job->needs_primitives_generated)
623 v3d_ensure_prim_counts_allocated(v3d);
624
625 enable_double_buffer_mode(job);
626
627 alloc_tile_state(job);
628
629 v3d_X(devinfo, emit_rcl)(job);
630
631 if (cl_offset(&job->bcl) > 0)
632 v3d_X(devinfo, bcl_epilogue)(v3d, job);
633
634 if (v3d->in_fence_fd >= 0) {
635 /* pipe_caps.native_fence */
636 if (drmSyncobjImportSyncFile(v3d->fd, v3d->in_syncobj,
637 v3d->in_fence_fd)) {
638 fprintf(stderr, "Failed to import native fence.\n");
639 } else {
640 job->submit.in_sync_bcl = v3d->in_syncobj;
641 }
642 close(v3d->in_fence_fd);
643 v3d->in_fence_fd = -1;
644 } else {
645 /* While the RCL will implicitly depend on the last RCL to have
646 * finished, we also need to block on any previous TFU job we
647 * may have dispatched.
648 */
649 job->submit.in_sync_rcl = v3d->out_sync;
650 }
651
652 /* Update the sync object for the last rendering by our context. */
653 job->submit.out_sync = v3d->out_sync;
654
655 job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
656 job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
657
658 if (v3d->active_perfmon) {
659 assert(screen->has_perfmon);
660 job->submit.perfmon_id = v3d->active_perfmon->kperfmon_id;
661 }
662
663 /* If we are submitting a job with a different perfmon, we need to
664 * ensure the previous one fully finishes before starting this;
665 * otherwise it would wrongly mix counter results.
666 */
667 if (v3d->active_perfmon != v3d->last_perfmon) {
668 v3d->last_perfmon = v3d->active_perfmon;
669 job->submit.in_sync_bcl = v3d->out_sync;
670 }
671
672 job->submit.flags = 0;
673 if (job->tmu_dirty_rcl && screen->has_cache_flush)
674 job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
675
676 /* On V3D 4.1, the tile alloc/state setup moved to register writes
677 * instead of binner packets.
678 */
679 if (devinfo->ver >= 42) {
680 v3d_job_add_bo(job, job->tile_alloc);
681 job->submit.qma = job->tile_alloc->offset;
682 job->submit.qms = job->tile_alloc->size;
683
684 v3d_job_add_bo(job, job->tile_state);
685 job->submit.qts = job->tile_state->offset;
686 }
687
688 v3d_clif_dump(v3d, job);
689
690 if (!V3D_DBG(NORAST)) {
691 int ret;
692 ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
693 static bool warned = false;
694 if (ret && !warned) {
695 fprintf(stderr, "Draw call returned %s. "
696 "Expect corruption.\n", strerror(errno));
697 warned = true;
698 } else if (!ret) {
699 if (v3d->active_perfmon)
700 v3d->active_perfmon->job_submitted = true;
701 if (V3D_DBG(SYNC)) {
702 drmSyncobjWait(v3d->fd, &v3d->out_sync, 1, INT64_MAX,
703 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
704 }
705 }
706
707 /* If we are submitting a job in the middle of transform
708 * feedback or there is a primitives generated query with a
709 * geometry shader then we need to read the primitive counts
710 * and accumulate them, otherwise they will be reset at the
711 * start of the next draw when we emit the Tile Binning Mode
712 * Configuration packet.
713 *
714 * If the job doesn't have any TF draw calls, then we know
715 * the primitive count must be zero and we can skip stalling
716 * for this. This also fixes a problem because it seems that
717 * in this scenario the counters are not reset with the Tile
718 * Binning Mode Configuration packet, which would translate
719 * to us reading an obsolete (possibly non-zero) value from
720 * the GPU counters.
721 */
722 if (job->needs_primitives_generated ||
723 (v3d->streamout.num_targets &&
724 job->tf_draw_calls_queued > 0))
725 v3d_read_and_accumulate_primitive_counters(v3d);
726 }
727
728 done:
729 v3d_job_free(v3d, job);
730 }
731
732 DERIVE_HASH_TABLE(v3d_job_key);
733
734 void
v3d_job_init(struct v3d_context * v3d)735 v3d_job_init(struct v3d_context *v3d)
736 {
737 v3d->jobs = v3d_job_key_table_create(v3d);
738 v3d->write_jobs = _mesa_hash_table_create(v3d,
739 _mesa_hash_pointer,
740 _mesa_key_pointer_equal);
741 }
742
743