• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014-2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file vc4_job.c
25  *
26  * Functions for submitting VC4 render jobs to the kernel.
27  */
28 
29 #include <xf86drm.h>
30 #include "vc4_cl_dump.h"
31 #include "vc4_context.h"
32 #include "util/hash_table.h"
33 
34 static void
vc4_job_free(struct vc4_context * vc4,struct vc4_job * job)35 vc4_job_free(struct vc4_context *vc4, struct vc4_job *job)
36 {
37         struct vc4_bo **referenced_bos = job->bo_pointers.base;
38         for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
39                 vc4_bo_unreference(&referenced_bos[i]);
40         }
41 
42         _mesa_hash_table_remove_key(vc4->jobs, &job->key);
43 
44         if (job->color_write) {
45                 _mesa_hash_table_remove_key(vc4->write_jobs,
46                                             job->color_write->texture);
47                 pipe_surface_reference(&job->color_write, NULL);
48         }
49         if (job->msaa_color_write) {
50                 _mesa_hash_table_remove_key(vc4->write_jobs,
51                                             job->msaa_color_write->texture);
52                 pipe_surface_reference(&job->msaa_color_write, NULL);
53         }
54         if (job->zs_write) {
55                 _mesa_hash_table_remove_key(vc4->write_jobs,
56                                             job->zs_write->texture);
57                 pipe_surface_reference(&job->zs_write, NULL);
58         }
59         if (job->msaa_zs_write) {
60                 _mesa_hash_table_remove_key(vc4->write_jobs,
61                                             job->msaa_zs_write->texture);
62                 pipe_surface_reference(&job->msaa_zs_write, NULL);
63         }
64 
65         pipe_surface_reference(&job->color_read, NULL);
66         pipe_surface_reference(&job->zs_read, NULL);
67 
68         if (vc4->job == job)
69                 vc4->job = NULL;
70 
71         ralloc_free(job);
72 }
73 
74 static struct vc4_job *
vc4_job_create(struct vc4_context * vc4)75 vc4_job_create(struct vc4_context *vc4)
76 {
77         struct vc4_job *job = rzalloc(vc4, struct vc4_job);
78 
79         vc4_init_cl(job, &job->bcl);
80         vc4_init_cl(job, &job->shader_rec);
81         vc4_init_cl(job, &job->uniforms);
82         vc4_init_cl(job, &job->bo_handles);
83         vc4_init_cl(job, &job->bo_pointers);
84 
85         job->draw_min_x = ~0;
86         job->draw_min_y = ~0;
87         job->draw_max_x = 0;
88         job->draw_max_y = 0;
89 
90         job->last_gem_handle_hindex = ~0;
91 
92         if (vc4->perfmon)
93                 job->perfmon = vc4->perfmon;
94 
95         return job;
96 }
97 
98 void
vc4_flush_jobs_writing_resource(struct vc4_context * vc4,struct pipe_resource * prsc)99 vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
100                                 struct pipe_resource *prsc)
101 {
102         struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
103                                                            prsc);
104         if (entry) {
105                 struct vc4_job *job = entry->data;
106                 vc4_job_submit(vc4, job);
107         }
108 }
109 
110 void
vc4_flush_jobs_reading_resource(struct vc4_context * vc4,struct pipe_resource * prsc)111 vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
112                                 struct pipe_resource *prsc)
113 {
114         struct vc4_resource *rsc = vc4_resource(prsc);
115 
116         vc4_flush_jobs_writing_resource(vc4, prsc);
117 
118         hash_table_foreach(vc4->jobs, entry) {
119                 struct vc4_job *job = entry->data;
120 
121                 struct vc4_bo **referenced_bos = job->bo_pointers.base;
122                 bool found = false;
123                 for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
124                         if (referenced_bos[i] == rsc->bo) {
125                                 found = true;
126                                 break;
127                         }
128                 }
129                 if (found) {
130                         vc4_job_submit(vc4, job);
131                         continue;
132                 }
133 
134                 /* Also check for the Z/color buffers, since the references to
135                  * those are only added immediately before submit.
136                  */
137                 if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) {
138                         struct vc4_resource *ctex =
139                                 vc4_resource(job->color_read->texture);
140                         if (ctex->bo == rsc->bo) {
141                                 vc4_job_submit(vc4, job);
142                                 continue;
143                         }
144                 }
145 
146                 if (job->zs_read && !(job->cleared &
147                                       (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
148                         struct vc4_resource *ztex =
149                                 vc4_resource(job->zs_read->texture);
150                         if (ztex->bo == rsc->bo) {
151                                 vc4_job_submit(vc4, job);
152                                 continue;
153                         }
154                 }
155         }
156 }
157 
158 /**
159  * Returns a vc4_job struture for tracking V3D rendering to a particular FBO.
160  *
161  * If we've already started rendering to this FBO, then return old same job,
162  * otherwise make a new one.  If we're beginning rendering to an FBO, make
163  * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
164  * have been flushed.
165  */
166 struct vc4_job *
vc4_get_job(struct vc4_context * vc4,struct pipe_surface * cbuf,struct pipe_surface * zsbuf)167 vc4_get_job(struct vc4_context *vc4,
168             struct pipe_surface *cbuf, struct pipe_surface *zsbuf)
169 {
170         /* Return the existing job for this FBO if we have one */
171         struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf};
172         struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs,
173                                                            &local_key);
174         if (entry)
175                 return entry->data;
176 
177         /* Creating a new job.  Make sure that any previous jobs reading or
178          * writing these buffers are flushed.
179          */
180         if (cbuf)
181                 vc4_flush_jobs_reading_resource(vc4, cbuf->texture);
182         if (zsbuf)
183                 vc4_flush_jobs_reading_resource(vc4, zsbuf->texture);
184 
185         struct vc4_job *job = vc4_job_create(vc4);
186 
187         if (cbuf) {
188                 if (cbuf->texture->nr_samples > 1) {
189                         job->msaa = true;
190                         pipe_surface_reference(&job->msaa_color_write, cbuf);
191                 } else {
192                         pipe_surface_reference(&job->color_write, cbuf);
193                 }
194         }
195 
196         if (zsbuf) {
197                 if (zsbuf->texture->nr_samples > 1) {
198                         job->msaa = true;
199                         pipe_surface_reference(&job->msaa_zs_write, zsbuf);
200                 } else {
201                         pipe_surface_reference(&job->zs_write, zsbuf);
202                 }
203         }
204 
205         if (job->msaa) {
206                 job->tile_width = 32;
207                 job->tile_height = 32;
208         } else {
209                 job->tile_width = 64;
210                 job->tile_height = 64;
211         }
212 
213         if (cbuf)
214                 _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job);
215         if (zsbuf)
216                 _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job);
217 
218         job->key.cbuf = cbuf;
219         job->key.zsbuf = zsbuf;
220         _mesa_hash_table_insert(vc4->jobs, &job->key, job);
221 
222         return job;
223 }
224 
225 struct vc4_job *
vc4_get_job_for_fbo(struct vc4_context * vc4)226 vc4_get_job_for_fbo(struct vc4_context *vc4)
227 {
228         if (vc4->job)
229                 return vc4->job;
230 
231         struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
232         struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
233         struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf);
234 
235         /* The dirty flags are tracking what's been updated while vc4->job has
236          * been bound, so set them all to ~0 when switching between jobs.  We
237          * also need to reset all state at the start of rendering.
238          */
239         vc4->dirty = ~0;
240 
241         /* Set up the read surfaces in the job.  If they aren't actually
242          * getting read (due to a clear starting the frame), job->cleared will
243          * mask out the read.
244          */
245         pipe_surface_reference(&job->color_read, cbuf);
246         pipe_surface_reference(&job->zs_read, zsbuf);
247 
248         /* If we're binding to uninitialized buffers, no need to load their
249          * contents before drawing.
250          */
251         if (cbuf) {
252                 struct vc4_resource *rsc = vc4_resource(cbuf->texture);
253                 if (!rsc->writes)
254                         job->cleared |= PIPE_CLEAR_COLOR0;
255         }
256 
257         if (zsbuf) {
258                 struct vc4_resource *rsc = vc4_resource(zsbuf->texture);
259                 if (!rsc->writes)
260                         job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
261         }
262 
263         job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width,
264                                          job->tile_width);
265         job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height,
266                                          job->tile_height);
267 
268         /* Initialize the job with the raster order flags -- each draw will
269          * check that we haven't changed the flags, since that requires a
270          * flush.
271          */
272         if (vc4->rasterizer)
273                 job->flags = vc4->rasterizer->tile_raster_order_flags;
274 
275         vc4->job = job;
276 
277         return job;
278 }
279 
280 static void
vc4_submit_setup_rcl_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf,bool is_depth,bool is_write)281 vc4_submit_setup_rcl_surface(struct vc4_job *job,
282                              struct drm_vc4_submit_rcl_surface *submit_surf,
283                              struct pipe_surface *psurf,
284                              bool is_depth, bool is_write)
285 {
286         struct vc4_surface *surf = vc4_surface(psurf);
287 
288         if (!surf)
289                 return;
290 
291         struct vc4_resource *rsc = vc4_resource(psurf->texture);
292         submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
293         submit_surf->offset = surf->offset;
294 
295         if (psurf->texture->nr_samples <= 1) {
296                 if (is_depth) {
297                         submit_surf->bits =
298                                 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
299                                               VC4_LOADSTORE_TILE_BUFFER_BUFFER);
300 
301                 } else {
302                         submit_surf->bits =
303                                 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
304                                               VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
305                                 VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
306                                               VC4_LOADSTORE_TILE_BUFFER_BGR565 :
307                                               VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
308                                               VC4_LOADSTORE_TILE_BUFFER_FORMAT);
309                 }
310                 submit_surf->bits |=
311                         VC4_SET_FIELD(surf->tiling,
312                                       VC4_LOADSTORE_TILE_BUFFER_TILING);
313         } else {
314                 assert(!is_write);
315                 submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
316         }
317 
318         if (is_write)
319                 rsc->writes++;
320 }
321 
322 static void
vc4_submit_setup_rcl_render_config_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf)323 vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job,
324                                            struct drm_vc4_submit_rcl_surface *submit_surf,
325                                            struct pipe_surface *psurf)
326 {
327         struct vc4_surface *surf = vc4_surface(psurf);
328 
329         if (!surf)
330                 return;
331 
332         struct vc4_resource *rsc = vc4_resource(psurf->texture);
333         submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
334         submit_surf->offset = surf->offset;
335 
336         if (psurf->texture->nr_samples <= 1) {
337                 submit_surf->bits =
338                         VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
339                                       VC4_RENDER_CONFIG_FORMAT_BGR565 :
340                                       VC4_RENDER_CONFIG_FORMAT_RGBA8888,
341                                       VC4_RENDER_CONFIG_FORMAT) |
342                         VC4_SET_FIELD(surf->tiling,
343                                       VC4_RENDER_CONFIG_MEMORY_FORMAT);
344         }
345 
346         rsc->writes++;
347 }
348 
349 static void
vc4_submit_setup_rcl_msaa_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf)350 vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job,
351                                   struct drm_vc4_submit_rcl_surface *submit_surf,
352                                   struct pipe_surface *psurf)
353 {
354         struct vc4_surface *surf = vc4_surface(psurf);
355 
356         if (!surf)
357                 return;
358 
359         struct vc4_resource *rsc = vc4_resource(psurf->texture);
360         submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
361         submit_surf->offset = surf->offset;
362         submit_surf->bits = 0;
363         rsc->writes++;
364 }
365 
366 /**
367  * Submits the job to the kernel and then reinitializes it.
368  */
369 void
vc4_job_submit(struct vc4_context * vc4,struct vc4_job * job)370 vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
371 {
372         if (!job->needs_flush)
373                 goto done;
374 
375         /* The RCL setup would choke if the draw bounds cause no drawing, so
376          * just drop the drawing if that's the case.
377          */
378         if (job->draw_max_x <= job->draw_min_x ||
379             job->draw_max_y <= job->draw_min_y) {
380                 goto done;
381         }
382 
383         if (vc4_debug & VC4_DEBUG_CL) {
384                 fprintf(stderr, "BCL:\n");
385                 vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false);
386         }
387 
388         if (cl_offset(&job->bcl) > 0) {
389                 /* Increment the semaphore indicating that binning is done and
390                  * unblocking the render thread.  Note that this doesn't act
391                  * until the FLUSH completes.
392                  */
393                 cl_ensure_space(&job->bcl, 8);
394                 cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
395                 /* The FLUSH caps all of our bin lists with a
396                  * VC4_PACKET_RETURN.
397                  */
398                 cl_emit(&job->bcl, FLUSH, flush);
399         }
400         struct drm_vc4_submit_cl submit = {
401                 .color_read.hindex = ~0,
402                 .zs_read.hindex = ~0,
403                 .color_write.hindex = ~0,
404                 .msaa_color_write.hindex = ~0,
405                 .zs_write.hindex = ~0,
406                 .msaa_zs_write.hindex = ~0,
407         };
408 
409         cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t));
410         cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *));
411 
412         if (job->resolve & PIPE_CLEAR_COLOR) {
413                 if (!(job->cleared & PIPE_CLEAR_COLOR)) {
414                         vc4_submit_setup_rcl_surface(job, &submit.color_read,
415                                                      job->color_read,
416                                                      false, false);
417                 }
418                 vc4_submit_setup_rcl_render_config_surface(job,
419                                                            &submit.color_write,
420                                                            job->color_write);
421                 vc4_submit_setup_rcl_msaa_surface(job,
422                                                   &submit.msaa_color_write,
423                                                   job->msaa_color_write);
424         }
425         if (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
426                 if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
427                         vc4_submit_setup_rcl_surface(job, &submit.zs_read,
428                                                      job->zs_read, true, false);
429                 }
430                 vc4_submit_setup_rcl_surface(job, &submit.zs_write,
431                                              job->zs_write, true, true);
432                 vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write,
433                                                   job->msaa_zs_write);
434         }
435 
436         if (job->msaa) {
437                 /* This bit controls how many pixels the general
438                  * (i.e. subsampled) loads/stores are iterating over
439                  * (multisample loads replicate out to the other samples).
440                  */
441                 submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;
442                 /* Controls whether color_write's
443                  * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
444                  */
445                 submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
446         }
447 
448         submit.bo_handles = (uintptr_t)job->bo_handles.base;
449         submit.bo_handle_count = cl_offset(&job->bo_handles) / 4;
450         submit.bin_cl = (uintptr_t)job->bcl.base;
451         submit.bin_cl_size = cl_offset(&job->bcl);
452         submit.shader_rec = (uintptr_t)job->shader_rec.base;
453         submit.shader_rec_size = cl_offset(&job->shader_rec);
454         submit.shader_rec_count = job->shader_rec_count;
455         submit.uniforms = (uintptr_t)job->uniforms.base;
456         submit.uniforms_size = cl_offset(&job->uniforms);
457 	if (job->perfmon)
458 		submit.perfmonid = job->perfmon->id;
459 
460         assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
461         submit.min_x_tile = job->draw_min_x / job->tile_width;
462         submit.min_y_tile = job->draw_min_y / job->tile_height;
463         submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width;
464         submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height;
465         submit.width = job->draw_width;
466         submit.height = job->draw_height;
467         if (job->cleared) {
468                 submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
469                 submit.clear_color[0] = job->clear_color[0];
470                 submit.clear_color[1] = job->clear_color[1];
471                 submit.clear_z = job->clear_depth;
472                 submit.clear_s = job->clear_stencil;
473         }
474         submit.flags |= job->flags;
475 
476         if (vc4->screen->has_syncobj) {
477                 submit.out_sync = vc4->job_syncobj;
478 
479                 if (vc4->in_fence_fd >= 0) {
480                         /* This replaces the fence in the syncobj. */
481                         drmSyncobjImportSyncFile(vc4->fd, vc4->in_syncobj,
482                                                  vc4->in_fence_fd);
483                         submit.in_sync = vc4->in_syncobj;
484                         close(vc4->in_fence_fd);
485                         vc4->in_fence_fd = -1;
486                 }
487         }
488 
489         if (!(vc4_debug & VC4_DEBUG_NORAST)) {
490                 int ret;
491 
492                 ret = vc4_ioctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
493                 static bool warned = false;
494                 if (ret && !warned) {
495                         fprintf(stderr, "Draw call returned %s.  "
496                                         "Expect corruption.\n", strerror(errno));
497                         warned = true;
498                 } else if (!ret) {
499                         vc4->last_emit_seqno = submit.seqno;
500                         if (job->perfmon)
501                                 job->perfmon->last_seqno = submit.seqno;
502                 }
503         }
504 
505         if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
506                 if (!vc4_wait_seqno(vc4->screen,
507                                     vc4->last_emit_seqno - 5,
508                                     PIPE_TIMEOUT_INFINITE,
509                                     "job throttling")) {
510                         fprintf(stderr, "Job throttling failed\n");
511                 }
512         }
513 
514         if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
515                 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
516                                     PIPE_TIMEOUT_INFINITE, "sync")) {
517                         fprintf(stderr, "Wait failed.\n");
518                         abort();
519                 }
520         }
521 
522 done:
523         vc4_job_free(vc4, job);
524 }
525 
526 static bool
vc4_job_compare(const void * a,const void * b)527 vc4_job_compare(const void *a, const void *b)
528 {
529         return memcmp(a, b, sizeof(struct vc4_job_key)) == 0;
530 }
531 
532 static uint32_t
vc4_job_hash(const void * key)533 vc4_job_hash(const void *key)
534 {
535         return _mesa_hash_data(key, sizeof(struct vc4_job_key));
536 }
537 
538 int
vc4_job_init(struct vc4_context * vc4)539 vc4_job_init(struct vc4_context *vc4)
540 {
541         vc4->jobs = _mesa_hash_table_create(vc4,
542                                             vc4_job_hash,
543                                             vc4_job_compare);
544         vc4->write_jobs = _mesa_hash_table_create(vc4,
545                                                   _mesa_hash_pointer,
546                                                   _mesa_key_pointer_equal);
547 
548         if (vc4->screen->has_syncobj) {
549                 /* Create the syncobj as signaled since with no job executed
550                  * there is nothing to wait on.
551                  */
552                 int ret = drmSyncobjCreate(vc4->fd,
553                                            DRM_SYNCOBJ_CREATE_SIGNALED,
554                                            &vc4->job_syncobj);
555                 if (ret) {
556                         /* If the screen indicated syncobj support, we should
557                          * be able to create a signaled syncobj.
558                          * At this point it is too late to pretend the screen
559                          * has no syncobj support.
560                          */
561                         return ret;
562                 }
563         }
564 
565         return 0;
566 }
567 
568