1 /*
2 * Copyright © 2014-2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file vc4_job.c
25 *
26 * Functions for submitting VC4 render jobs to the kernel.
27 */
28
29 #include <xf86drm.h>
30 #include "vc4_cl_dump.h"
31 #include "vc4_context.h"
32 #include "util/hash_table.h"
33
34 static void
vc4_job_free(struct vc4_context * vc4,struct vc4_job * job)35 vc4_job_free(struct vc4_context *vc4, struct vc4_job *job)
36 {
37 struct vc4_bo **referenced_bos = job->bo_pointers.base;
38 for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
39 vc4_bo_unreference(&referenced_bos[i]);
40 }
41
42 _mesa_hash_table_remove_key(vc4->jobs, &job->key);
43
44 if (job->color_write) {
45 _mesa_hash_table_remove_key(vc4->write_jobs,
46 job->color_write->texture);
47 pipe_surface_reference(&job->color_write, NULL);
48 }
49 if (job->msaa_color_write) {
50 _mesa_hash_table_remove_key(vc4->write_jobs,
51 job->msaa_color_write->texture);
52 pipe_surface_reference(&job->msaa_color_write, NULL);
53 }
54 if (job->zs_write) {
55 _mesa_hash_table_remove_key(vc4->write_jobs,
56 job->zs_write->texture);
57 pipe_surface_reference(&job->zs_write, NULL);
58 }
59 if (job->msaa_zs_write) {
60 _mesa_hash_table_remove_key(vc4->write_jobs,
61 job->msaa_zs_write->texture);
62 pipe_surface_reference(&job->msaa_zs_write, NULL);
63 }
64
65 pipe_surface_reference(&job->color_read, NULL);
66 pipe_surface_reference(&job->zs_read, NULL);
67
68 if (vc4->job == job)
69 vc4->job = NULL;
70
71 ralloc_free(job);
72 }
73
74 static struct vc4_job *
vc4_job_create(struct vc4_context * vc4)75 vc4_job_create(struct vc4_context *vc4)
76 {
77 struct vc4_job *job = rzalloc(vc4, struct vc4_job);
78
79 vc4_init_cl(job, &job->bcl);
80 vc4_init_cl(job, &job->shader_rec);
81 vc4_init_cl(job, &job->uniforms);
82 vc4_init_cl(job, &job->bo_handles);
83 vc4_init_cl(job, &job->bo_pointers);
84
85 job->draw_min_x = ~0;
86 job->draw_min_y = ~0;
87 job->draw_max_x = 0;
88 job->draw_max_y = 0;
89
90 job->last_gem_handle_hindex = ~0;
91
92 if (vc4->perfmon)
93 job->perfmon = vc4->perfmon;
94
95 return job;
96 }
97
98 void
vc4_flush_jobs_writing_resource(struct vc4_context * vc4,struct pipe_resource * prsc)99 vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
100 struct pipe_resource *prsc)
101 {
102 struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
103 prsc);
104 if (entry) {
105 struct vc4_job *job = entry->data;
106 vc4_job_submit(vc4, job);
107 }
108 }
109
110 void
vc4_flush_jobs_reading_resource(struct vc4_context * vc4,struct pipe_resource * prsc)111 vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
112 struct pipe_resource *prsc)
113 {
114 struct vc4_resource *rsc = vc4_resource(prsc);
115
116 vc4_flush_jobs_writing_resource(vc4, prsc);
117
118 hash_table_foreach(vc4->jobs, entry) {
119 struct vc4_job *job = entry->data;
120
121 struct vc4_bo **referenced_bos = job->bo_pointers.base;
122 bool found = false;
123 for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
124 if (referenced_bos[i] == rsc->bo) {
125 found = true;
126 break;
127 }
128 }
129 if (found) {
130 vc4_job_submit(vc4, job);
131 continue;
132 }
133
134 /* Also check for the Z/color buffers, since the references to
135 * those are only added immediately before submit.
136 */
137 if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) {
138 struct vc4_resource *ctex =
139 vc4_resource(job->color_read->texture);
140 if (ctex->bo == rsc->bo) {
141 vc4_job_submit(vc4, job);
142 continue;
143 }
144 }
145
146 if (job->zs_read && !(job->cleared &
147 (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
148 struct vc4_resource *ztex =
149 vc4_resource(job->zs_read->texture);
150 if (ztex->bo == rsc->bo) {
151 vc4_job_submit(vc4, job);
152 continue;
153 }
154 }
155 }
156 }
157
158 /**
159 * Returns a vc4_job struture for tracking V3D rendering to a particular FBO.
160 *
161 * If we've already started rendering to this FBO, then return old same job,
162 * otherwise make a new one. If we're beginning rendering to an FBO, make
163 * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
164 * have been flushed.
165 */
166 struct vc4_job *
vc4_get_job(struct vc4_context * vc4,struct pipe_surface * cbuf,struct pipe_surface * zsbuf)167 vc4_get_job(struct vc4_context *vc4,
168 struct pipe_surface *cbuf, struct pipe_surface *zsbuf)
169 {
170 /* Return the existing job for this FBO if we have one */
171 struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf};
172 struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs,
173 &local_key);
174 if (entry)
175 return entry->data;
176
177 /* Creating a new job. Make sure that any previous jobs reading or
178 * writing these buffers are flushed.
179 */
180 if (cbuf)
181 vc4_flush_jobs_reading_resource(vc4, cbuf->texture);
182 if (zsbuf)
183 vc4_flush_jobs_reading_resource(vc4, zsbuf->texture);
184
185 struct vc4_job *job = vc4_job_create(vc4);
186
187 if (cbuf) {
188 if (cbuf->texture->nr_samples > 1) {
189 job->msaa = true;
190 pipe_surface_reference(&job->msaa_color_write, cbuf);
191 } else {
192 pipe_surface_reference(&job->color_write, cbuf);
193 }
194 }
195
196 if (zsbuf) {
197 if (zsbuf->texture->nr_samples > 1) {
198 job->msaa = true;
199 pipe_surface_reference(&job->msaa_zs_write, zsbuf);
200 } else {
201 pipe_surface_reference(&job->zs_write, zsbuf);
202 }
203 }
204
205 if (job->msaa) {
206 job->tile_width = 32;
207 job->tile_height = 32;
208 } else {
209 job->tile_width = 64;
210 job->tile_height = 64;
211 }
212
213 if (cbuf)
214 _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job);
215 if (zsbuf)
216 _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job);
217
218 job->key.cbuf = cbuf;
219 job->key.zsbuf = zsbuf;
220 _mesa_hash_table_insert(vc4->jobs, &job->key, job);
221
222 return job;
223 }
224
225 struct vc4_job *
vc4_get_job_for_fbo(struct vc4_context * vc4)226 vc4_get_job_for_fbo(struct vc4_context *vc4)
227 {
228 if (vc4->job)
229 return vc4->job;
230
231 struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
232 struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
233 struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf);
234
235 /* The dirty flags are tracking what's been updated while vc4->job has
236 * been bound, so set them all to ~0 when switching between jobs. We
237 * also need to reset all state at the start of rendering.
238 */
239 vc4->dirty = ~0;
240
241 /* Set up the read surfaces in the job. If they aren't actually
242 * getting read (due to a clear starting the frame), job->cleared will
243 * mask out the read.
244 */
245 pipe_surface_reference(&job->color_read, cbuf);
246 pipe_surface_reference(&job->zs_read, zsbuf);
247
248 /* If we're binding to uninitialized buffers, no need to load their
249 * contents before drawing.
250 */
251 if (cbuf) {
252 struct vc4_resource *rsc = vc4_resource(cbuf->texture);
253 if (!rsc->writes)
254 job->cleared |= PIPE_CLEAR_COLOR0;
255 }
256
257 if (zsbuf) {
258 struct vc4_resource *rsc = vc4_resource(zsbuf->texture);
259 if (!rsc->writes)
260 job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
261 }
262
263 job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width,
264 job->tile_width);
265 job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height,
266 job->tile_height);
267
268 /* Initialize the job with the raster order flags -- each draw will
269 * check that we haven't changed the flags, since that requires a
270 * flush.
271 */
272 if (vc4->rasterizer)
273 job->flags = vc4->rasterizer->tile_raster_order_flags;
274
275 vc4->job = job;
276
277 return job;
278 }
279
280 static void
vc4_submit_setup_rcl_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf,bool is_depth,bool is_write)281 vc4_submit_setup_rcl_surface(struct vc4_job *job,
282 struct drm_vc4_submit_rcl_surface *submit_surf,
283 struct pipe_surface *psurf,
284 bool is_depth, bool is_write)
285 {
286 struct vc4_surface *surf = vc4_surface(psurf);
287
288 if (!surf)
289 return;
290
291 struct vc4_resource *rsc = vc4_resource(psurf->texture);
292 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
293 submit_surf->offset = surf->offset;
294
295 if (psurf->texture->nr_samples <= 1) {
296 if (is_depth) {
297 submit_surf->bits =
298 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
299 VC4_LOADSTORE_TILE_BUFFER_BUFFER);
300
301 } else {
302 submit_surf->bits =
303 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
304 VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
305 VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
306 VC4_LOADSTORE_TILE_BUFFER_BGR565 :
307 VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
308 VC4_LOADSTORE_TILE_BUFFER_FORMAT);
309 }
310 submit_surf->bits |=
311 VC4_SET_FIELD(surf->tiling,
312 VC4_LOADSTORE_TILE_BUFFER_TILING);
313 } else {
314 assert(!is_write);
315 submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
316 }
317
318 if (is_write)
319 rsc->writes++;
320 }
321
322 static void
vc4_submit_setup_rcl_render_config_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf)323 vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job,
324 struct drm_vc4_submit_rcl_surface *submit_surf,
325 struct pipe_surface *psurf)
326 {
327 struct vc4_surface *surf = vc4_surface(psurf);
328
329 if (!surf)
330 return;
331
332 struct vc4_resource *rsc = vc4_resource(psurf->texture);
333 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
334 submit_surf->offset = surf->offset;
335
336 if (psurf->texture->nr_samples <= 1) {
337 submit_surf->bits =
338 VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
339 VC4_RENDER_CONFIG_FORMAT_BGR565 :
340 VC4_RENDER_CONFIG_FORMAT_RGBA8888,
341 VC4_RENDER_CONFIG_FORMAT) |
342 VC4_SET_FIELD(surf->tiling,
343 VC4_RENDER_CONFIG_MEMORY_FORMAT);
344 }
345
346 rsc->writes++;
347 }
348
349 static void
vc4_submit_setup_rcl_msaa_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf)350 vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job,
351 struct drm_vc4_submit_rcl_surface *submit_surf,
352 struct pipe_surface *psurf)
353 {
354 struct vc4_surface *surf = vc4_surface(psurf);
355
356 if (!surf)
357 return;
358
359 struct vc4_resource *rsc = vc4_resource(psurf->texture);
360 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
361 submit_surf->offset = surf->offset;
362 submit_surf->bits = 0;
363 rsc->writes++;
364 }
365
366 /**
367 * Submits the job to the kernel and then reinitializes it.
368 */
369 void
vc4_job_submit(struct vc4_context * vc4,struct vc4_job * job)370 vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
371 {
372 if (!job->needs_flush)
373 goto done;
374
375 /* The RCL setup would choke if the draw bounds cause no drawing, so
376 * just drop the drawing if that's the case.
377 */
378 if (job->draw_max_x <= job->draw_min_x ||
379 job->draw_max_y <= job->draw_min_y) {
380 goto done;
381 }
382
383 if (vc4_debug & VC4_DEBUG_CL) {
384 fprintf(stderr, "BCL:\n");
385 vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false);
386 }
387
388 if (cl_offset(&job->bcl) > 0) {
389 /* Increment the semaphore indicating that binning is done and
390 * unblocking the render thread. Note that this doesn't act
391 * until the FLUSH completes.
392 */
393 cl_ensure_space(&job->bcl, 8);
394 cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
395 /* The FLUSH caps all of our bin lists with a
396 * VC4_PACKET_RETURN.
397 */
398 cl_emit(&job->bcl, FLUSH, flush);
399 }
400 struct drm_vc4_submit_cl submit = {
401 .color_read.hindex = ~0,
402 .zs_read.hindex = ~0,
403 .color_write.hindex = ~0,
404 .msaa_color_write.hindex = ~0,
405 .zs_write.hindex = ~0,
406 .msaa_zs_write.hindex = ~0,
407 };
408
409 cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t));
410 cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *));
411
412 if (job->resolve & PIPE_CLEAR_COLOR) {
413 if (!(job->cleared & PIPE_CLEAR_COLOR)) {
414 vc4_submit_setup_rcl_surface(job, &submit.color_read,
415 job->color_read,
416 false, false);
417 }
418 vc4_submit_setup_rcl_render_config_surface(job,
419 &submit.color_write,
420 job->color_write);
421 vc4_submit_setup_rcl_msaa_surface(job,
422 &submit.msaa_color_write,
423 job->msaa_color_write);
424 }
425 if (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
426 if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
427 vc4_submit_setup_rcl_surface(job, &submit.zs_read,
428 job->zs_read, true, false);
429 }
430 vc4_submit_setup_rcl_surface(job, &submit.zs_write,
431 job->zs_write, true, true);
432 vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write,
433 job->msaa_zs_write);
434 }
435
436 if (job->msaa) {
437 /* This bit controls how many pixels the general
438 * (i.e. subsampled) loads/stores are iterating over
439 * (multisample loads replicate out to the other samples).
440 */
441 submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;
442 /* Controls whether color_write's
443 * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
444 */
445 submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
446 }
447
448 submit.bo_handles = (uintptr_t)job->bo_handles.base;
449 submit.bo_handle_count = cl_offset(&job->bo_handles) / 4;
450 submit.bin_cl = (uintptr_t)job->bcl.base;
451 submit.bin_cl_size = cl_offset(&job->bcl);
452 submit.shader_rec = (uintptr_t)job->shader_rec.base;
453 submit.shader_rec_size = cl_offset(&job->shader_rec);
454 submit.shader_rec_count = job->shader_rec_count;
455 submit.uniforms = (uintptr_t)job->uniforms.base;
456 submit.uniforms_size = cl_offset(&job->uniforms);
457 if (job->perfmon)
458 submit.perfmonid = job->perfmon->id;
459
460 assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
461 submit.min_x_tile = job->draw_min_x / job->tile_width;
462 submit.min_y_tile = job->draw_min_y / job->tile_height;
463 submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width;
464 submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height;
465 submit.width = job->draw_width;
466 submit.height = job->draw_height;
467 if (job->cleared) {
468 submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
469 submit.clear_color[0] = job->clear_color[0];
470 submit.clear_color[1] = job->clear_color[1];
471 submit.clear_z = job->clear_depth;
472 submit.clear_s = job->clear_stencil;
473 }
474 submit.flags |= job->flags;
475
476 if (vc4->screen->has_syncobj) {
477 submit.out_sync = vc4->job_syncobj;
478
479 if (vc4->in_fence_fd >= 0) {
480 /* This replaces the fence in the syncobj. */
481 drmSyncobjImportSyncFile(vc4->fd, vc4->in_syncobj,
482 vc4->in_fence_fd);
483 submit.in_sync = vc4->in_syncobj;
484 close(vc4->in_fence_fd);
485 vc4->in_fence_fd = -1;
486 }
487 }
488
489 if (!(vc4_debug & VC4_DEBUG_NORAST)) {
490 int ret;
491
492 ret = vc4_ioctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
493 static bool warned = false;
494 if (ret && !warned) {
495 fprintf(stderr, "Draw call returned %s. "
496 "Expect corruption.\n", strerror(errno));
497 warned = true;
498 } else if (!ret) {
499 vc4->last_emit_seqno = submit.seqno;
500 if (job->perfmon)
501 job->perfmon->last_seqno = submit.seqno;
502 }
503 }
504
505 if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
506 if (!vc4_wait_seqno(vc4->screen,
507 vc4->last_emit_seqno - 5,
508 PIPE_TIMEOUT_INFINITE,
509 "job throttling")) {
510 fprintf(stderr, "Job throttling failed\n");
511 }
512 }
513
514 if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
515 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
516 PIPE_TIMEOUT_INFINITE, "sync")) {
517 fprintf(stderr, "Wait failed.\n");
518 abort();
519 }
520 }
521
522 done:
523 vc4_job_free(vc4, job);
524 }
525
526 static bool
vc4_job_compare(const void * a,const void * b)527 vc4_job_compare(const void *a, const void *b)
528 {
529 return memcmp(a, b, sizeof(struct vc4_job_key)) == 0;
530 }
531
532 static uint32_t
vc4_job_hash(const void * key)533 vc4_job_hash(const void *key)
534 {
535 return _mesa_hash_data(key, sizeof(struct vc4_job_key));
536 }
537
538 int
vc4_job_init(struct vc4_context * vc4)539 vc4_job_init(struct vc4_context *vc4)
540 {
541 vc4->jobs = _mesa_hash_table_create(vc4,
542 vc4_job_hash,
543 vc4_job_compare);
544 vc4->write_jobs = _mesa_hash_table_create(vc4,
545 _mesa_hash_pointer,
546 _mesa_key_pointer_equal);
547
548 if (vc4->screen->has_syncobj) {
549 /* Create the syncobj as signaled since with no job executed
550 * there is nothing to wait on.
551 */
552 int ret = drmSyncobjCreate(vc4->fd,
553 DRM_SYNCOBJ_CREATE_SIGNALED,
554 &vc4->job_syncobj);
555 if (ret) {
556 /* If the screen indicated syncobj support, we should
557 * be able to create a signaled syncobj.
558 * At this point it is too late to pretend the screen
559 * has no syncobj support.
560 */
561 return ret;
562 }
563 }
564
565 return 0;
566 }
567
568