• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014-2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file vc4_job.c
25  *
26  * Functions for submitting VC4 render jobs to the kernel.
27  */
28 
29 #include <xf86drm.h>
30 #include "vc4_cl_dump.h"
31 #include "vc4_context.h"
32 #include "util/hash_table.h"
33 #include "util/perf/cpu_trace.h"
34 
35 static void
vc4_job_free(struct vc4_context * vc4,struct vc4_job * job)36 vc4_job_free(struct vc4_context *vc4, struct vc4_job *job)
37 {
38         struct vc4_bo **referenced_bos = job->bo_pointers.base;
39         for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
40                 vc4_bo_unreference(&referenced_bos[i]);
41         }
42 
43         _mesa_hash_table_remove_key(vc4->jobs, &job->key);
44 
45         if (job->color_write) {
46                 _mesa_hash_table_remove_key(vc4->write_jobs,
47                                             job->color_write->texture);
48                 pipe_surface_reference(&job->color_write, NULL);
49         }
50         if (job->msaa_color_write) {
51                 _mesa_hash_table_remove_key(vc4->write_jobs,
52                                             job->msaa_color_write->texture);
53                 pipe_surface_reference(&job->msaa_color_write, NULL);
54         }
55         if (job->zs_write) {
56                 _mesa_hash_table_remove_key(vc4->write_jobs,
57                                             job->zs_write->texture);
58                 pipe_surface_reference(&job->zs_write, NULL);
59         }
60         if (job->msaa_zs_write) {
61                 _mesa_hash_table_remove_key(vc4->write_jobs,
62                                             job->msaa_zs_write->texture);
63                 pipe_surface_reference(&job->msaa_zs_write, NULL);
64         }
65 
66         pipe_surface_reference(&job->color_read, NULL);
67         pipe_surface_reference(&job->zs_read, NULL);
68 
69         if (vc4->job == job)
70                 vc4->job = NULL;
71 
72         ralloc_free(job);
73 }
74 
75 static struct vc4_job *
vc4_job_create(struct vc4_context * vc4)76 vc4_job_create(struct vc4_context *vc4)
77 {
78         struct vc4_job *job = rzalloc(vc4, struct vc4_job);
79 
80         vc4_init_cl(job, &job->bcl);
81         vc4_init_cl(job, &job->shader_rec);
82         vc4_init_cl(job, &job->uniforms);
83         vc4_init_cl(job, &job->bo_handles);
84         vc4_init_cl(job, &job->bo_pointers);
85 
86         job->draw_min_x = ~0;
87         job->draw_min_y = ~0;
88         job->draw_max_x = 0;
89         job->draw_max_y = 0;
90 
91         job->last_gem_handle_hindex = ~0;
92 
93         if (vc4->perfmon)
94                 job->perfmon = vc4->perfmon;
95 
96         return job;
97 }
98 
99 void
vc4_flush_jobs_writing_resource(struct vc4_context * vc4,struct pipe_resource * prsc)100 vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
101                                 struct pipe_resource *prsc)
102 {
103         struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
104                                                            prsc);
105         if (entry) {
106                 struct vc4_job *job = entry->data;
107 
108                 MESA_TRACE_FUNC();
109 
110                 vc4_job_submit(vc4, job);
111         }
112 }
113 
114 void
vc4_flush_jobs_reading_resource(struct vc4_context * vc4,struct pipe_resource * prsc)115 vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
116                                 struct pipe_resource *prsc)
117 {
118         struct vc4_resource *rsc = vc4_resource(prsc);
119 
120         MESA_TRACE_FUNC();
121 
122         vc4_flush_jobs_writing_resource(vc4, prsc);
123 
124         hash_table_foreach(vc4->jobs, entry) {
125                 struct vc4_job *job = entry->data;
126 
127                 struct vc4_bo **referenced_bos = job->bo_pointers.base;
128                 bool found = false;
129                 for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
130                         if (referenced_bos[i] == rsc->bo) {
131                                 found = true;
132                                 break;
133                         }
134                 }
135                 if (found) {
136                         vc4_job_submit(vc4, job);
137                         continue;
138                 }
139 
140                 /* Also check for the Z/color buffers, since the references to
141                  * those are only added immediately before submit.
142                  */
143                 if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) {
144                         struct vc4_resource *ctex =
145                                 vc4_resource(job->color_read->texture);
146                         if (ctex->bo == rsc->bo) {
147                                 vc4_job_submit(vc4, job);
148                                 continue;
149                         }
150                 }
151 
152                 if (job->zs_read && !(job->cleared &
153                                       (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
154                         struct vc4_resource *ztex =
155                                 vc4_resource(job->zs_read->texture);
156                         if (ztex->bo == rsc->bo) {
157                                 vc4_job_submit(vc4, job);
158                                 continue;
159                         }
160                 }
161         }
162 }
163 
164 /**
165  * Returns a vc4_job structure for tracking V3D rendering to a particular FBO.
166  *
167  * If we've already started rendering to this FBO, then return old same job,
168  * otherwise make a new one.  If we're beginning rendering to an FBO, make
169  * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
170  * have been flushed.
171  */
172 struct vc4_job *
vc4_get_job(struct vc4_context * vc4,struct pipe_surface * cbuf,struct pipe_surface * zsbuf)173 vc4_get_job(struct vc4_context *vc4,
174             struct pipe_surface *cbuf, struct pipe_surface *zsbuf)
175 {
176         /* Return the existing job for this FBO if we have one */
177         struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf};
178         struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs,
179                                                            &local_key);
180         if (entry)
181                 return entry->data;
182 
183         /* Creating a new job.  Make sure that any previous jobs reading or
184          * writing these buffers are flushed.
185          */
186         if (cbuf)
187                 vc4_flush_jobs_reading_resource(vc4, cbuf->texture);
188         if (zsbuf)
189                 vc4_flush_jobs_reading_resource(vc4, zsbuf->texture);
190 
191         struct vc4_job *job = vc4_job_create(vc4);
192 
193         if (cbuf) {
194                 if (cbuf->texture->nr_samples > 1) {
195                         job->msaa = true;
196                         pipe_surface_reference(&job->msaa_color_write, cbuf);
197                 } else {
198                         pipe_surface_reference(&job->color_write, cbuf);
199                 }
200         }
201 
202         if (zsbuf) {
203                 if (zsbuf->texture->nr_samples > 1) {
204                         job->msaa = true;
205                         pipe_surface_reference(&job->msaa_zs_write, zsbuf);
206                 } else {
207                         pipe_surface_reference(&job->zs_write, zsbuf);
208                 }
209         }
210 
211         if (job->msaa) {
212                 job->tile_width = 32;
213                 job->tile_height = 32;
214         } else {
215                 job->tile_width = 64;
216                 job->tile_height = 64;
217         }
218 
219         if (cbuf)
220                 _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job);
221         if (zsbuf)
222                 _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job);
223 
224         job->key.cbuf = cbuf;
225         job->key.zsbuf = zsbuf;
226         _mesa_hash_table_insert(vc4->jobs, &job->key, job);
227 
228         return job;
229 }
230 
231 struct vc4_job *
vc4_get_job_for_fbo(struct vc4_context * vc4)232 vc4_get_job_for_fbo(struct vc4_context *vc4)
233 {
234         if (vc4->job)
235                 return vc4->job;
236 
237         struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
238         struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
239         struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf);
240 
241         /* The dirty flags are tracking what's been updated while vc4->job has
242          * been bound, so set them all to ~0 when switching between jobs.  We
243          * also need to reset all state at the start of rendering.
244          */
245         vc4->dirty = ~0;
246 
247         /* Set up the read surfaces in the job.  If they aren't actually
248          * getting read (due to a clear starting the frame), job->cleared will
249          * mask out the read.
250          */
251         pipe_surface_reference(&job->color_read, cbuf);
252         pipe_surface_reference(&job->zs_read, zsbuf);
253 
254         /* If we're binding to uninitialized buffers, no need to load their
255          * contents before drawing.
256          */
257         if (cbuf) {
258                 struct vc4_resource *rsc = vc4_resource(cbuf->texture);
259                 if (!rsc->writes)
260                         job->cleared |= PIPE_CLEAR_COLOR0;
261         }
262 
263         if (zsbuf) {
264                 struct vc4_resource *rsc = vc4_resource(zsbuf->texture);
265                 if (!rsc->writes)
266                         job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
267         }
268 
269         job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width,
270                                          job->tile_width);
271         job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height,
272                                          job->tile_height);
273 
274         /* Initialize the job with the raster order flags -- each draw will
275          * check that we haven't changed the flags, since that requires a
276          * flush.
277          */
278         if (vc4->rasterizer)
279                 job->flags = vc4->rasterizer->tile_raster_order_flags;
280 
281         vc4->job = job;
282 
283         return job;
284 }
285 
286 static void
vc4_submit_setup_rcl_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf,bool is_depth,bool is_write)287 vc4_submit_setup_rcl_surface(struct vc4_job *job,
288                              struct drm_vc4_submit_rcl_surface *submit_surf,
289                              struct pipe_surface *psurf,
290                              bool is_depth, bool is_write)
291 {
292         struct vc4_surface *surf = vc4_surface(psurf);
293 
294         if (!surf)
295                 return;
296 
297         struct vc4_resource *rsc = vc4_resource(psurf->texture);
298         submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
299         submit_surf->offset = surf->offset;
300 
301         if (psurf->texture->nr_samples <= 1) {
302                 if (is_depth) {
303                         submit_surf->bits =
304                                 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
305                                               VC4_LOADSTORE_TILE_BUFFER_BUFFER);
306 
307                 } else {
308                         submit_surf->bits =
309                                 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
310                                               VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
311                                 VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
312                                               VC4_LOADSTORE_TILE_BUFFER_BGR565 :
313                                               VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
314                                               VC4_LOADSTORE_TILE_BUFFER_FORMAT);
315                 }
316                 submit_surf->bits |=
317                         VC4_SET_FIELD(surf->tiling,
318                                       VC4_LOADSTORE_TILE_BUFFER_TILING);
319         } else {
320                 assert(!is_write);
321                 submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
322         }
323 
324         if (is_write)
325                 rsc->writes++;
326 }
327 
328 static void
vc4_submit_setup_rcl_render_config_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf)329 vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job,
330                                            struct drm_vc4_submit_rcl_surface *submit_surf,
331                                            struct pipe_surface *psurf)
332 {
333         struct vc4_surface *surf = vc4_surface(psurf);
334 
335         if (!surf)
336                 return;
337 
338         struct vc4_resource *rsc = vc4_resource(psurf->texture);
339         submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
340         submit_surf->offset = surf->offset;
341 
342         if (psurf->texture->nr_samples <= 1) {
343                 submit_surf->bits =
344                         VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
345                                       VC4_RENDER_CONFIG_FORMAT_BGR565 :
346                                       VC4_RENDER_CONFIG_FORMAT_RGBA8888,
347                                       VC4_RENDER_CONFIG_FORMAT) |
348                         VC4_SET_FIELD(surf->tiling,
349                                       VC4_RENDER_CONFIG_MEMORY_FORMAT);
350         }
351 
352         rsc->writes++;
353 }
354 
355 static void
vc4_submit_setup_rcl_msaa_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf)356 vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job,
357                                   struct drm_vc4_submit_rcl_surface *submit_surf,
358                                   struct pipe_surface *psurf)
359 {
360         struct vc4_surface *surf = vc4_surface(psurf);
361 
362         if (!surf)
363                 return;
364 
365         struct vc4_resource *rsc = vc4_resource(psurf->texture);
366         submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
367         submit_surf->offset = surf->offset;
368         submit_surf->bits = 0;
369         rsc->writes++;
370 }
371 
372 /**
373  * Submits the job to the kernel and then reinitializes it.
374  */
375 void
vc4_job_submit(struct vc4_context * vc4,struct vc4_job * job)376 vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
377 {
378         MESA_TRACE_FUNC();
379 
380         if (!job->needs_flush)
381                 goto done;
382 
383         /* The RCL setup would choke if the draw bounds cause no drawing, so
384          * just drop the drawing if that's the case.
385          */
386         if (job->draw_max_x <= job->draw_min_x ||
387             job->draw_max_y <= job->draw_min_y) {
388                 goto done;
389         }
390 
391         if (VC4_DBG(CL)) {
392                 fprintf(stderr, "BCL:\n");
393                 vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false);
394         }
395 
396         if (cl_offset(&job->bcl) > 0) {
397                 /* Increment the semaphore indicating that binning is done and
398                  * unblocking the render thread.  Note that this doesn't act
399                  * until the FLUSH completes.
400                  */
401                 cl_ensure_space(&job->bcl, 8);
402                 cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
403                 /* The FLUSH caps all of our bin lists with a
404                  * VC4_PACKET_RETURN.
405                  */
406                 cl_emit(&job->bcl, FLUSH, flush);
407         }
408         struct drm_vc4_submit_cl submit = {
409                 .color_read.hindex = ~0,
410                 .zs_read.hindex = ~0,
411                 .color_write.hindex = ~0,
412                 .msaa_color_write.hindex = ~0,
413                 .zs_write.hindex = ~0,
414                 .msaa_zs_write.hindex = ~0,
415         };
416 
417         cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t));
418         cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *));
419 
420         if (job->resolve & PIPE_CLEAR_COLOR) {
421                 if (!(job->cleared & PIPE_CLEAR_COLOR)) {
422                         vc4_submit_setup_rcl_surface(job, &submit.color_read,
423                                                      job->color_read,
424                                                      false, false);
425                 }
426                 vc4_submit_setup_rcl_render_config_surface(job,
427                                                            &submit.color_write,
428                                                            job->color_write);
429                 vc4_submit_setup_rcl_msaa_surface(job,
430                                                   &submit.msaa_color_write,
431                                                   job->msaa_color_write);
432         }
433         if (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
434                 if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
435                         vc4_submit_setup_rcl_surface(job, &submit.zs_read,
436                                                      job->zs_read, true, false);
437                 }
438                 vc4_submit_setup_rcl_surface(job, &submit.zs_write,
439                                              job->zs_write, true, true);
440                 vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write,
441                                                   job->msaa_zs_write);
442         }
443 
444         if (job->msaa) {
445                 /* This bit controls how many pixels the general
446                  * (i.e. subsampled) loads/stores are iterating over
447                  * (multisample loads replicate out to the other samples).
448                  */
449                 submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;
450                 /* Controls whether color_write's
451                  * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
452                  */
453                 submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
454         }
455 
456         submit.bo_handles = (uintptr_t)job->bo_handles.base;
457         submit.bo_handle_count = cl_offset(&job->bo_handles) / 4;
458         submit.bin_cl = (uintptr_t)job->bcl.base;
459         submit.bin_cl_size = cl_offset(&job->bcl);
460         submit.shader_rec = (uintptr_t)job->shader_rec.base;
461         submit.shader_rec_size = cl_offset(&job->shader_rec);
462         submit.shader_rec_count = job->shader_rec_count;
463         submit.uniforms = (uintptr_t)job->uniforms.base;
464         submit.uniforms_size = cl_offset(&job->uniforms);
465 	if (job->perfmon)
466 		submit.perfmonid = job->perfmon->id;
467 
468         assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
469         submit.min_x_tile = job->draw_min_x / job->tile_width;
470         submit.min_y_tile = job->draw_min_y / job->tile_height;
471         submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width;
472         submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height;
473         submit.width = job->draw_width;
474         submit.height = job->draw_height;
475         if (job->cleared) {
476                 submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
477                 submit.clear_color[0] = job->clear_color[0];
478                 submit.clear_color[1] = job->clear_color[1];
479                 submit.clear_z = job->clear_depth;
480                 submit.clear_s = job->clear_stencil;
481         }
482         submit.flags |= job->flags;
483 
484         if (vc4->screen->has_syncobj) {
485                 submit.out_sync = vc4->job_syncobj;
486 
487                 if (vc4->in_fence_fd >= 0) {
488                         /* This replaces the fence in the syncobj. */
489                         drmSyncobjImportSyncFile(vc4->fd, vc4->in_syncobj,
490                                                  vc4->in_fence_fd);
491                         submit.in_sync = vc4->in_syncobj;
492                         close(vc4->in_fence_fd);
493                         vc4->in_fence_fd = -1;
494                 }
495         }
496 
497         if (!VC4_DBG(NORAST)) {
498                 int ret;
499 
500                 ret = vc4_ioctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
501                 static bool warned = false;
502                 if (ret && !warned) {
503                         fprintf(stderr, "Draw call returned %s.  "
504                                         "Expect corruption.\n", strerror(errno));
505                         warned = true;
506                 } else if (!ret) {
507                         vc4->last_emit_seqno = submit.seqno;
508                         if (job->perfmon)
509                                 job->perfmon->last_seqno = submit.seqno;
510                 }
511         }
512 
513         if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
514                 if (!vc4_wait_seqno(vc4->screen,
515                                     vc4->last_emit_seqno - 5,
516                                     OS_TIMEOUT_INFINITE,
517                                     "job throttling")) {
518                         fprintf(stderr, "Job throttling failed\n");
519                 }
520         }
521 
522         if (VC4_DBG(ALWAYS_SYNC)) {
523                 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
524                                     OS_TIMEOUT_INFINITE, "sync")) {
525                         fprintf(stderr, "Wait failed.\n");
526                         abort();
527                 }
528         }
529 
530 done:
531         vc4_job_free(vc4, job);
532 }
533 
534 static bool
vc4_job_compare(const void * a,const void * b)535 vc4_job_compare(const void *a, const void *b)
536 {
537         return memcmp(a, b, sizeof(struct vc4_job_key)) == 0;
538 }
539 
540 static uint32_t
vc4_job_hash(const void * key)541 vc4_job_hash(const void *key)
542 {
543         return _mesa_hash_data(key, sizeof(struct vc4_job_key));
544 }
545 
546 int
vc4_job_init(struct vc4_context * vc4)547 vc4_job_init(struct vc4_context *vc4)
548 {
549         vc4->jobs = _mesa_hash_table_create(vc4,
550                                             vc4_job_hash,
551                                             vc4_job_compare);
552         vc4->write_jobs = _mesa_hash_table_create(vc4,
553                                                   _mesa_hash_pointer,
554                                                   _mesa_key_pointer_equal);
555 
556         if (vc4->screen->has_syncobj) {
557                 /* Create the syncobj as signaled since with no job executed
558                  * there is nothing to wait on.
559                  */
560                 int ret = drmSyncobjCreate(vc4->fd,
561                                            DRM_SYNCOBJ_CREATE_SIGNALED,
562                                            &vc4->job_syncobj);
563                 if (ret) {
564                         /* If the screen indicated syncobj support, we should
565                          * be able to create a signaled syncobj.
566                          * At this point it is too late to pretend the screen
567                          * has no syncobj support.
568                          */
569                         return ret;
570                 }
571         }
572 
573         return 0;
574 }
575 
576