• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file v3d_job.c
25  *
26  * Functions for submitting V3D render jobs to the kernel.
27  */
28 
29 #include <xf86drm.h>
30 #include <libsync.h>
31 #include "v3d_context.h"
32 /* The OQ/semaphore packets are the same across V3D versions. */
33 #define V3D_VERSION 42
34 #include "broadcom/cle/v3dx_pack.h"
35 #include "broadcom/common/v3d_macros.h"
36 #include "util/hash_table.h"
37 #include "util/perf/cpu_trace.h"
38 #include "util/ralloc.h"
39 #include "util/set.h"
40 #include "broadcom/clif/clif_dump.h"
41 
42 void
v3d_job_free(struct v3d_context * v3d,struct v3d_job * job)43 v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
44 {
45         set_foreach(job->bos, entry) {
46                 struct v3d_bo *bo = (struct v3d_bo *)entry->key;
47                 v3d_bo_unreference(&bo);
48         }
49 
50         _mesa_hash_table_remove_key(v3d->jobs, &job->key);
51 
52         if (job->write_prscs) {
53                 set_foreach(job->write_prscs, entry) {
54                         const struct pipe_resource *prsc = entry->key;
55 
56                         _mesa_hash_table_remove_key(v3d->write_jobs, prsc);
57                 }
58         }
59 
60         for (int i = 0; i < job->nr_cbufs; i++) {
61                 if (job->cbufs[i]) {
62                         _mesa_hash_table_remove_key(v3d->write_jobs,
63                                                     job->cbufs[i]->texture);
64                         pipe_surface_reference(&job->cbufs[i], NULL);
65                 }
66         }
67         if (job->zsbuf) {
68                 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
69                 if (rsc->separate_stencil)
70                         _mesa_hash_table_remove_key(v3d->write_jobs,
71                                                     &rsc->separate_stencil->base);
72 
73                 _mesa_hash_table_remove_key(v3d->write_jobs,
74                                             job->zsbuf->texture);
75                 pipe_surface_reference(&job->zsbuf, NULL);
76         }
77         if (job->bbuf)
78                 pipe_surface_reference(&job->bbuf, NULL);
79 
80         if (job->dbuf)
81                 pipe_surface_reference(&job->dbuf, NULL);
82 
83         if (v3d->job == job)
84                 v3d->job = NULL;
85 
86         v3d_destroy_cl(&job->bcl);
87         v3d_destroy_cl(&job->rcl);
88         v3d_destroy_cl(&job->indirect);
89         v3d_bo_unreference(&job->tile_alloc);
90         v3d_bo_unreference(&job->tile_state);
91 
92         ralloc_free(job);
93 }
94 
95 struct v3d_job *
v3d_job_create(struct v3d_context * v3d)96 v3d_job_create(struct v3d_context *v3d)
97 {
98         struct v3d_job *job = rzalloc(v3d, struct v3d_job);
99 
100         job->v3d = v3d;
101 
102         v3d_init_cl(job, &job->bcl);
103         v3d_init_cl(job, &job->rcl);
104         v3d_init_cl(job, &job->indirect);
105 
106         job->draw_min_x = ~0;
107         job->draw_min_y = ~0;
108         job->draw_max_x = 0;
109         job->draw_max_y = 0;
110 
111         job->bos = _mesa_set_create(job,
112                                     _mesa_hash_pointer,
113                                     _mesa_key_pointer_equal);
114         return job;
115 }
116 
117 void
v3d_job_add_bo(struct v3d_job * job,struct v3d_bo * bo)118 v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo)
119 {
120         if (!bo)
121                 return;
122 
123         if (_mesa_set_search(job->bos, bo))
124                 return;
125 
126         v3d_bo_reference(bo);
127         _mesa_set_add(job->bos, bo);
128         job->referenced_size += bo->size;
129 
130         uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
131 
132         if (job->submit.bo_handle_count >= job->bo_handles_size) {
133                 job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
134                 bo_handles = reralloc(job, bo_handles,
135                                       uint32_t, job->bo_handles_size);
136                 job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
137         }
138         bo_handles[job->submit.bo_handle_count++] = bo->handle;
139 }
140 
141 void
v3d_job_add_write_resource(struct v3d_job * job,struct pipe_resource * prsc)142 v3d_job_add_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
143 {
144         struct v3d_context *v3d = job->v3d;
145 
146         if (!job->write_prscs) {
147                 job->write_prscs = _mesa_set_create(job,
148                                                     _mesa_hash_pointer,
149                                                     _mesa_key_pointer_equal);
150         }
151 
152         _mesa_set_add(job->write_prscs, prsc);
153         _mesa_hash_table_insert(v3d->write_jobs, prsc, job);
154 }
155 
156 void
v3d_flush_jobs_using_bo(struct v3d_context * v3d,struct v3d_bo * bo)157 v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo)
158 {
159         MESA_TRACE_FUNC();
160 
161         hash_table_foreach(v3d->jobs, entry) {
162                 struct v3d_job *job = entry->data;
163 
164                 if (_mesa_set_search(job->bos, bo))
165                         v3d_job_submit(v3d, job);
166         }
167 }
168 
169 void
v3d_job_add_tf_write_resource(struct v3d_job * job,struct pipe_resource * prsc)170 v3d_job_add_tf_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
171 {
172         v3d_job_add_write_resource(job, prsc);
173 
174         if (!job->tf_write_prscs)
175                 job->tf_write_prscs = _mesa_pointer_set_create(job);
176 
177         _mesa_set_add(job->tf_write_prscs, prsc);
178 }
179 
180 static bool
v3d_job_writes_resource_from_tf(struct v3d_job * job,struct pipe_resource * prsc)181 v3d_job_writes_resource_from_tf(struct v3d_job *job,
182                                 struct pipe_resource *prsc)
183 {
184         if (!job->tf_enabled)
185                 return false;
186 
187         if (!job->tf_write_prscs)
188                 return false;
189 
190         return _mesa_set_search(job->tf_write_prscs, prsc) != NULL;
191 }
192 
193 void
v3d_flush_jobs_writing_resource(struct v3d_context * v3d,struct pipe_resource * prsc,enum v3d_flush_cond flush_cond,bool is_compute_pipeline)194 v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
195                                 struct pipe_resource *prsc,
196                                 enum v3d_flush_cond flush_cond,
197                                 bool is_compute_pipeline)
198 {
199         struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
200                                                            prsc);
201         if (!entry)
202                 return;
203 
204         struct v3d_resource *rsc = v3d_resource(prsc);
205 
206         /* We need to sync if graphics pipeline reads a resource written
207          * by the compute pipeline. The same is needed for the case of
208          * graphics-compute dependency but flushing the job.
209          */
210         if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
211                 v3d->sync_on_last_compute_job = true;
212                 rsc->compute_written = false;
213         }
214         if (is_compute_pipeline && rsc->bo != NULL && rsc->graphics_written) {
215                 flush_cond = V3D_FLUSH_ALWAYS;
216                 rsc->graphics_written = false;
217         }
218 
219         struct v3d_job *job = entry->data;
220 
221         bool needs_flush;
222         switch (flush_cond) {
223         case V3D_FLUSH_ALWAYS:
224                 needs_flush = true;
225                 break;
226         case V3D_FLUSH_NOT_CURRENT_JOB:
227                 needs_flush = !v3d->job || v3d->job != job;
228                 break;
229         case V3D_FLUSH_DEFAULT:
230         default:
231                 /* For writes from TF in the same job we use the "Wait for TF"
232                  * feature provided by the hardware so we don't want to flush.
233                  * The exception to this is when the caller is about to map the
234                  * resource since in that case we don't have a 'Wait for TF'
235                  * command the in command stream. In this scenario the caller
236                  * is expected to set 'always_flush' to True.
237                  */
238                 needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);
239         }
240 
241         if (needs_flush) {
242                 MESA_TRACE_FUNC();
243                 v3d_job_submit(v3d, job);
244         }
245 }
246 
247 void
v3d_flush_jobs_reading_resource(struct v3d_context * v3d,struct pipe_resource * prsc,enum v3d_flush_cond flush_cond,bool is_compute_pipeline)248 v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
249                                 struct pipe_resource *prsc,
250                                 enum v3d_flush_cond flush_cond,
251                                 bool is_compute_pipeline)
252 {
253         struct v3d_resource *rsc = v3d_resource(prsc);
254 
255         /* We only need to force the flush on TF writes, which is the only
256          * case where we might skip the flush to use the 'Wait for TF'
257          * command. Here we are flushing for a read, which means that the
258          * caller intends to write to the resource, so we don't care if
259          * there was a previous TF write to it.
260          */
261         v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
262                                         is_compute_pipeline);
263 
264         hash_table_foreach(v3d->jobs, entry) {
265                 struct v3d_job *job = entry->data;
266 
267                 if (!_mesa_set_search(job->bos, rsc->bo))
268                         continue;
269 
270                 bool needs_flush;
271                 switch (flush_cond) {
272                 case V3D_FLUSH_NOT_CURRENT_JOB:
273                         needs_flush = !v3d->job || v3d->job != job;
274                         break;
275                 case V3D_FLUSH_ALWAYS:
276                 case V3D_FLUSH_DEFAULT:
277                 default:
278                         needs_flush = true;
279                 }
280 
281                 if (needs_flush) {
282                         MESA_TRACE_FUNC();
283                         v3d_job_submit(v3d, job);
284                 }
285 
286                 /* Reminder: v3d->jobs is safe to keep iterating even
287                  * after deletion of an entry.
288                  */
289                 continue;
290         }
291 }
292 
293 /**
294  * Returns a v3d_job structure for tracking V3D rendering to a particular FBO.
295  *
296  * If we've already started rendering to this FBO, then return the same job,
297  * otherwise make a new one.  If we're beginning rendering to an FBO, make
298  * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
299  * have been flushed.
300  */
301 struct v3d_job *
v3d_get_job(struct v3d_context * v3d,uint32_t nr_cbufs,struct pipe_surface ** cbufs,struct pipe_surface * zsbuf,struct pipe_surface * bbuf)302 v3d_get_job(struct v3d_context *v3d,
303             uint32_t nr_cbufs,
304             struct pipe_surface **cbufs,
305             struct pipe_surface *zsbuf,
306             struct pipe_surface *bbuf)
307 {
308         /* Return the existing job for this FBO if we have one */
309         struct v3d_job_key local_key = {
310                 .cbufs = {
311                         cbufs[0],
312                         cbufs[1],
313                         cbufs[2],
314                         cbufs[3],
315                         cbufs[4],
316                         cbufs[5],
317                         cbufs[6],
318                         cbufs[7],
319                 },
320                 .zsbuf = zsbuf,
321                 .bbuf = bbuf,
322         };
323         struct hash_entry *entry = _mesa_hash_table_search(v3d->jobs,
324                                                            &local_key);
325         if (entry)
326                 return entry->data;
327 
328         /* Creating a new job.  Make sure that any previous jobs reading or
329          * writing these buffers are flushed.
330          */
331         struct v3d_job *job = v3d_job_create(v3d);
332         job->nr_cbufs = nr_cbufs;
333 
334         for (int i = 0; i < job->nr_cbufs; i++) {
335                 if (cbufs[i]) {
336                         v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
337                                                         V3D_FLUSH_DEFAULT,
338                                                         false);
339                         pipe_surface_reference(&job->cbufs[i], cbufs[i]);
340 
341                         if (cbufs[i]->texture->nr_samples > 1)
342                                 job->msaa = true;
343                 }
344         }
345         if (zsbuf) {
346                 v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
347                                                 V3D_FLUSH_DEFAULT,
348                                                 false);
349                 pipe_surface_reference(&job->zsbuf, zsbuf);
350                 if (zsbuf->texture->nr_samples > 1)
351                         job->msaa = true;
352         }
353         if (bbuf) {
354                 pipe_surface_reference(&job->bbuf, bbuf);
355                 if (bbuf->texture->nr_samples > 1)
356                         job->msaa = true;
357         }
358 
359         for (int i = 0; i < job->nr_cbufs; i++) {
360                 if (cbufs[i])
361                         _mesa_hash_table_insert(v3d->write_jobs,
362                                                 cbufs[i]->texture, job);
363         }
364         if (zsbuf) {
365                 _mesa_hash_table_insert(v3d->write_jobs, zsbuf->texture, job);
366 
367                 struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
368                 if (rsc->separate_stencil) {
369                         v3d_flush_jobs_reading_resource(v3d,
370                                                         &rsc->separate_stencil->base,
371                                                         V3D_FLUSH_DEFAULT,
372                                                         false);
373                         _mesa_hash_table_insert(v3d->write_jobs,
374                                                 &rsc->separate_stencil->base,
375                                                 job);
376                 }
377         }
378 
379         /* By default we disable double buffer but we allow it to be enabled
380          * later on (except for msaa) if we don't find any other reason
381          * to disable it.
382          */
383         job->can_use_double_buffer = !job->msaa && V3D_DBG(DOUBLE_BUFFER);
384         job->double_buffer = false;
385 
386         memcpy(&job->key, &local_key, sizeof(local_key));
387         _mesa_hash_table_insert(v3d->jobs, &job->key, job);
388 
389         return job;
390 }
391 
392 struct v3d_job *
v3d_get_job_for_fbo(struct v3d_context * v3d)393 v3d_get_job_for_fbo(struct v3d_context *v3d)
394 {
395         if (v3d->job)
396                 return v3d->job;
397 
398         uint32_t nr_cbufs = v3d->framebuffer.nr_cbufs;
399         struct pipe_surface **cbufs = v3d->framebuffer.cbufs;
400         struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf;
401         struct v3d_job *job = v3d_get_job(v3d, nr_cbufs, cbufs, zsbuf, NULL);
402 
403         if (v3d->framebuffer.samples >= 1) {
404                 job->msaa = true;
405                 job->double_buffer = false;
406         }
407 
408         v3d_get_tile_buffer_size(&v3d->screen->devinfo,
409                                  job->msaa, job->double_buffer,
410                                  job->nr_cbufs, job->cbufs, job->bbuf,
411                                  &job->tile_desc.width,
412                                  &job->tile_desc.height,
413                                  &job->internal_bpp);
414 
415         /* The dirty flags are tracking what's been updated while v3d->job has
416          * been bound, so set them all to ~0 when switching between jobs.  We
417          * also need to reset all state at the start of rendering.
418          */
419         v3d->dirty = ~0;
420 
421         /* If we're binding to uninitialized buffers, no need to load their
422          * contents before drawing.
423          */
424         for (int i = 0; i < nr_cbufs; i++) {
425                 if (cbufs[i]) {
426                         struct v3d_resource *rsc = v3d_resource(cbufs[i]->texture);
427                         if (!rsc->writes)
428                                 job->clear_tlb |= PIPE_CLEAR_COLOR0 << i;
429                         if (rsc->invalidated) {
430                                 job->invalidated_load |= PIPE_CLEAR_COLOR0 << i;
431                                 rsc->invalidated = false;
432                         }
433                 }
434         }
435 
436         if (zsbuf) {
437                 struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
438                 if (!rsc->writes)
439                         job->clear_tlb |= PIPE_CLEAR_DEPTH;
440 
441                 if (rsc->separate_stencil)
442                         rsc = rsc->separate_stencil;
443 
444                 if (!rsc->writes)
445                         job->clear_tlb |= PIPE_CLEAR_STENCIL;
446                 if (rsc->invalidated) {
447                         /* Currently gallium only applies invalidates if it
448                          * affects both depth and stencil together.
449                          */
450                          job->invalidated_load |=
451                                  PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
452                         rsc->invalidated = false;
453                         if (rsc->separate_stencil)
454                                 rsc->separate_stencil->invalidated = false;
455                 }
456         }
457 
458         job->tile_desc.draw_x = DIV_ROUND_UP(v3d->framebuffer.width,
459                                              job->tile_desc.width);
460         job->tile_desc.draw_y = DIV_ROUND_UP(v3d->framebuffer.height,
461                                              job->tile_desc.height);
462 
463         v3d->job = job;
464 
465         return job;
466 }
467 
468 static void
v3d_clif_dump(struct v3d_context * v3d,struct v3d_job * job)469 v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job)
470 {
471         if (!(V3D_DBG(CL) ||
472               V3D_DBG(CL_NO_BIN) ||
473               V3D_DBG(CLIF)))
474                 return;
475 
476         struct clif_dump *clif = clif_dump_init(&v3d->screen->devinfo,
477                                                 stderr,
478                                                 V3D_DBG(CL) ||
479                                                 V3D_DBG(CL_NO_BIN),
480                                                 V3D_DBG(CL_NO_BIN));
481 
482         set_foreach(job->bos, entry) {
483                 struct v3d_bo *bo = (void *)entry->key;
484                 char *name = ralloc_asprintf(NULL, "%s_0x%x",
485                                              bo->name, bo->offset);
486 
487                 v3d_bo_map(bo);
488                 clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
489 
490                 ralloc_free(name);
491         }
492 
493         clif_dump(clif, &job->submit);
494 
495         clif_dump_destroy(clif);
496 }
497 
498 static void
v3d_read_and_accumulate_primitive_counters(struct v3d_context * v3d)499 v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
500 {
501         assert(v3d->prim_counts);
502 
503         perf_debug("stalling on TF counts readback\n");
504         struct v3d_resource *rsc = v3d_resource(v3d->prim_counts);
505         if (v3d_bo_wait(rsc->bo, OS_TIMEOUT_INFINITE, "prim-counts")) {
506                 uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
507                 v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
508                 /* When we only have a vertex shader with no primitive
509                  * restart, we determine the primitive count in the CPU so
510                  * don't update it here again.
511                  */
512                 if (v3d->prog.gs || v3d->prim_restart) {
513                         v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
514                         uint8_t prim_mode =
515                                 v3d->prog.gs ? v3d->prog.gs->prog_data.gs->out_prim_type
516                                              : v3d->prim_mode;
517                         uint32_t vertices_written =
518                                 map[V3D_PRIM_COUNTS_TF_WRITTEN] * mesa_vertices_per_prim(prim_mode);
519                         for (int i = 0; i < v3d->streamout.num_targets; i++) {
520                                 v3d_stream_output_target(v3d->streamout.targets[i])->offset +=
521                                         vertices_written;
522                         }
523                 }
524         }
525 }
526 
527 static void
alloc_tile_state(struct v3d_job * job)528 alloc_tile_state(struct v3d_job *job)
529 {
530         assert(!job->tile_alloc && !job->tile_state);
531 
532         /* The PTB will request the tile alloc initial size per tile at start
533          * of tile binning.
534          */
535         uint32_t tile_alloc_size =
536                 MAX2(job->num_layers, 1) * job->tile_desc.draw_x *
537                 job->tile_desc.draw_y * 64;
538 
539         /* The PTB allocates in aligned 4k chunks after the initial setup. */
540         tile_alloc_size = align(tile_alloc_size, 4096);
541 
542         /* Include the first two chunk allocations that the PTB does so that
543          * we definitely clear the OOM condition before triggering one (the HW
544          * won't trigger OOM during the first allocations).
545          */
546         tile_alloc_size += 8192;
547 
548         /* For performance, allocate some extra initial memory after the PTB's
549          * minimal allocations, so that we hopefully don't have to block the
550          * GPU on the kernel handling an OOM signal.
551          */
552         tile_alloc_size += 512 * 1024;
553 
554         job->tile_alloc = v3d_bo_alloc(job->v3d->screen, tile_alloc_size,
555                                        "tile_alloc");
556         uint32_t tsda_per_tile_size = 256;
557         job->tile_state = v3d_bo_alloc(job->v3d->screen,
558                                        MAX2(job->num_layers, 1) *
559                                        job->tile_desc.draw_y *
560                                        job->tile_desc.draw_x *
561                                        tsda_per_tile_size,
562                                        "TSDA");
563 }
564 
565 static void
enable_double_buffer_mode(struct v3d_job * job)566 enable_double_buffer_mode(struct v3d_job *job)
567 {
568         /* Don't enable if we have seen incompatibilities */
569         if (!job->can_use_double_buffer)
570                 return;
571 
572          /* For now we only allow double buffer via envvar and only for jobs
573           * that are not MSAA, which is incompatible.
574           */
575         assert(V3D_DBG(DOUBLE_BUFFER) && !job->msaa);
576 
577         /* Tile loads are serialized against stores, in which case we don't get
578          * any benefits from enabling double-buffer and would just pay the price
579          * of a smaller tile size instead. Similarly, we only benefit from
580          * double-buffer if we have tile stores, as the point of this mode is
581          * to execute rendering of a new tile while we store the previous one to
582          * hide latency on the tile store operation.
583          */
584         if (job->load)
585                 return;
586 
587         if (!job->store)
588                return;
589 
590         if (!v3d_double_buffer_score_ok(&job->double_buffer_score))
591               return;
592 
593         /* Enable double-buffer mode.
594          *
595          * This will reduce the tile size so we need to recompute state
596          * that depends on this and rewrite the TILE_BINNING_MODE_CFG
597          * we emitted earlier in the CL.
598          */
599         job->double_buffer = true;
600         v3d_get_tile_buffer_size(&job->v3d->screen->devinfo,
601                                  job->msaa, job->double_buffer,
602                                  job->nr_cbufs, job->cbufs, job->bbuf,
603                                  &job->tile_desc.width, &job->tile_desc.height,
604                                  &job->internal_bpp);
605 
606         job->tile_desc.draw_x = DIV_ROUND_UP(job->draw_width,
607                                              job->tile_desc.width);
608         job->tile_desc.draw_y = DIV_ROUND_UP(job->draw_height,
609                                              job->tile_desc.height);
610 
611         struct v3d_device_info *devinfo = &job->v3d->screen->devinfo;
612         v3d_X(devinfo, job_emit_enable_double_buffer)(job);
613 }
614 
615 /**
616  * Submits the job to the kernel and then reinitializes it.
617  */
618 void
v3d_job_submit(struct v3d_context * v3d,struct v3d_job * job)619 v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
620 {
621         struct v3d_screen *screen = v3d->screen;
622         struct v3d_device_info *devinfo = &screen->devinfo;
623 
624         MESA_TRACE_FUNC();
625 
626         if (!job->needs_flush)
627                 goto done;
628 
629         /* The GL_PRIMITIVES_GENERATED query is included with
630          * OES_geometry_shader.
631          */
632         job->needs_primitives_generated =
633                 v3d->n_primitives_generated_queries_in_flight > 0 &&
634                 v3d->prog.gs;
635 
636         if (job->needs_primitives_generated)
637                 v3d_ensure_prim_counts_allocated(v3d);
638 
639         enable_double_buffer_mode(job);
640 
641         alloc_tile_state(job);
642 
643         v3d_X(devinfo, emit_rcl)(job);
644 
645         if (cl_offset(&job->bcl) > 0)
646                 v3d_X(devinfo, bcl_epilogue)(v3d, job);
647 
648         if (v3d->in_fence_fd >= 0) {
649                 /* pipe_caps.native_fence */
650                 if (drmSyncobjImportSyncFile(v3d->fd, v3d->in_syncobj,
651                                              v3d->in_fence_fd)) {
652                    fprintf(stderr, "Failed to import native fence.\n");
653                 } else {
654                    job->submit.in_sync_bcl = v3d->in_syncobj;
655                 }
656                 close(v3d->in_fence_fd);
657                 v3d->in_fence_fd = -1;
658         } else {
659                 /* While the RCL will implicitly depend on the last RCL to have
660                  * finished, we also need to block on any previous TFU job we
661                  * may have dispatched.
662                  */
663                 job->submit.in_sync_rcl = v3d->out_sync;
664         }
665 
666         /* Update the sync object for the last rendering by our context. */
667         job->submit.out_sync = v3d->out_sync;
668 
669         job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
670         job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
671 
672         if (v3d->active_perfmon) {
673                 assert(screen->has_perfmon);
674                 job->submit.perfmon_id = v3d->active_perfmon->kperfmon_id;
675         }
676 
677         /* If we are submitting a job with a different perfmon, we need to
678          * ensure the previous one fully finishes before starting this;
679          * otherwise it would wrongly mix counter results.
680          */
681         if (v3d->active_perfmon != v3d->last_perfmon) {
682                 v3d->last_perfmon = v3d->active_perfmon;
683                 job->submit.in_sync_bcl = v3d->out_sync;
684         }
685 
686         job->submit.flags = 0;
687         if (job->tmu_dirty_rcl && screen->has_cache_flush)
688                 job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
689 
690         /* On V3D 4.1, the tile alloc/state setup moved to register writes
691          * instead of binner packets.
692          */
693         if (devinfo->ver >= 42) {
694                 v3d_job_add_bo(job, job->tile_alloc);
695                 job->submit.qma = job->tile_alloc->offset;
696                 job->submit.qms = job->tile_alloc->size;
697 
698                 v3d_job_add_bo(job, job->tile_state);
699                 job->submit.qts = job->tile_state->offset;
700         }
701 
702         v3d_clif_dump(v3d, job);
703 
704         if (!V3D_DBG(NORAST)) {
705                 int ret;
706                 ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
707                 static bool warned = false;
708                 if (ret && !warned) {
709                         fprintf(stderr, "Draw call returned %s.  "
710                                         "Expect corruption.\n", strerror(errno));
711                         warned = true;
712                 } else if (!ret) {
713                         if (v3d->active_perfmon)
714                                 v3d->active_perfmon->job_submitted = true;
715                         if (V3D_DBG(SYNC)) {
716                                 drmSyncobjWait(v3d->fd, &v3d->out_sync, 1, INT64_MAX,
717                                                DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
718                         }
719                 }
720 
721                 /* If we are submitting a job in the middle of transform
722                  * feedback or there is a primitives generated query with a
723                  * geometry shader then we need to read the primitive counts
724                  * and accumulate them, otherwise they will be reset at the
725                  * start of the next draw when we emit the Tile Binning Mode
726                  * Configuration packet.
727                  *
728                  * If the job doesn't have any TF draw calls, then we know
729                  * the primitive count must be zero and we can skip stalling
730                  * for this. This also fixes a problem because it seems that
731                  * in this scenario the counters are not reset with the Tile
732                  * Binning Mode Configuration packet, which would translate
733                  * to us reading an obsolete (possibly non-zero) value from
734                  * the GPU counters.
735                  */
736                 if (job->needs_primitives_generated ||
737                     (v3d->streamout.num_targets &&
738                      job->tf_draw_calls_queued > 0))
739                         v3d_read_and_accumulate_primitive_counters(v3d);
740         }
741 
742 done:
743         v3d_job_free(v3d, job);
744 }
745 
746 DERIVE_HASH_TABLE(v3d_job_key);
747 
748 void
v3d_job_init(struct v3d_context * v3d)749 v3d_job_init(struct v3d_context *v3d)
750 {
751         v3d->jobs = v3d_job_key_table_create(v3d);
752         v3d->write_jobs = _mesa_hash_table_create(v3d,
753                                                   _mesa_hash_pointer,
754                                                   _mesa_key_pointer_equal);
755 }
756 
757