1 /*
2 * Copyright © 2014-2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file vc4_job.c
25 *
26 * Functions for submitting VC4 render jobs to the kernel.
27 */
28
29 #include <xf86drm.h>
30 #include "vc4_cl_dump.h"
31 #include "vc4_context.h"
32 #include "util/hash_table.h"
33 #include "util/perf/cpu_trace.h"
34
35 static void
vc4_job_free(struct vc4_context * vc4,struct vc4_job * job)36 vc4_job_free(struct vc4_context *vc4, struct vc4_job *job)
37 {
38 struct vc4_bo **referenced_bos = job->bo_pointers.base;
39 for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
40 vc4_bo_unreference(&referenced_bos[i]);
41 }
42
43 _mesa_hash_table_remove_key(vc4->jobs, &job->key);
44
45 if (job->color_write) {
46 _mesa_hash_table_remove_key(vc4->write_jobs,
47 job->color_write->texture);
48 pipe_surface_reference(&job->color_write, NULL);
49 }
50 if (job->msaa_color_write) {
51 _mesa_hash_table_remove_key(vc4->write_jobs,
52 job->msaa_color_write->texture);
53 pipe_surface_reference(&job->msaa_color_write, NULL);
54 }
55 if (job->zs_write) {
56 _mesa_hash_table_remove_key(vc4->write_jobs,
57 job->zs_write->texture);
58 pipe_surface_reference(&job->zs_write, NULL);
59 }
60 if (job->msaa_zs_write) {
61 _mesa_hash_table_remove_key(vc4->write_jobs,
62 job->msaa_zs_write->texture);
63 pipe_surface_reference(&job->msaa_zs_write, NULL);
64 }
65
66 pipe_surface_reference(&job->color_read, NULL);
67 pipe_surface_reference(&job->zs_read, NULL);
68
69 if (vc4->job == job)
70 vc4->job = NULL;
71
72 ralloc_free(job);
73 }
74
75 static struct vc4_job *
vc4_job_create(struct vc4_context * vc4)76 vc4_job_create(struct vc4_context *vc4)
77 {
78 struct vc4_job *job = rzalloc(vc4, struct vc4_job);
79
80 vc4_init_cl(job, &job->bcl);
81 vc4_init_cl(job, &job->shader_rec);
82 vc4_init_cl(job, &job->uniforms);
83 vc4_init_cl(job, &job->bo_handles);
84 vc4_init_cl(job, &job->bo_pointers);
85
86 job->draw_min_x = ~0;
87 job->draw_min_y = ~0;
88 job->draw_max_x = 0;
89 job->draw_max_y = 0;
90
91 job->last_gem_handle_hindex = ~0;
92
93 if (vc4->perfmon)
94 job->perfmon = vc4->perfmon;
95
96 return job;
97 }
98
99 void
vc4_flush_jobs_writing_resource(struct vc4_context * vc4,struct pipe_resource * prsc)100 vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
101 struct pipe_resource *prsc)
102 {
103 struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
104 prsc);
105 if (entry) {
106 struct vc4_job *job = entry->data;
107
108 MESA_TRACE_FUNC();
109
110 vc4_job_submit(vc4, job);
111 }
112 }
113
114 void
vc4_flush_jobs_reading_resource(struct vc4_context * vc4,struct pipe_resource * prsc)115 vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
116 struct pipe_resource *prsc)
117 {
118 struct vc4_resource *rsc = vc4_resource(prsc);
119
120 MESA_TRACE_FUNC();
121
122 vc4_flush_jobs_writing_resource(vc4, prsc);
123
124 hash_table_foreach(vc4->jobs, entry) {
125 struct vc4_job *job = entry->data;
126
127 struct vc4_bo **referenced_bos = job->bo_pointers.base;
128 bool found = false;
129 for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
130 if (referenced_bos[i] == rsc->bo) {
131 found = true;
132 break;
133 }
134 }
135 if (found) {
136 vc4_job_submit(vc4, job);
137 continue;
138 }
139
140 /* Also check for the Z/color buffers, since the references to
141 * those are only added immediately before submit.
142 */
143 if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) {
144 struct vc4_resource *ctex =
145 vc4_resource(job->color_read->texture);
146 if (ctex->bo == rsc->bo) {
147 vc4_job_submit(vc4, job);
148 continue;
149 }
150 }
151
152 if (job->zs_read && !(job->cleared &
153 (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
154 struct vc4_resource *ztex =
155 vc4_resource(job->zs_read->texture);
156 if (ztex->bo == rsc->bo) {
157 vc4_job_submit(vc4, job);
158 continue;
159 }
160 }
161 }
162 }
163
164 /**
165 * Returns a vc4_job structure for tracking V3D rendering to a particular FBO.
166 *
167 * If we've already started rendering to this FBO, then return old same job,
168 * otherwise make a new one. If we're beginning rendering to an FBO, make
169 * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
170 * have been flushed.
171 */
172 struct vc4_job *
vc4_get_job(struct vc4_context * vc4,struct pipe_surface * cbuf,struct pipe_surface * zsbuf)173 vc4_get_job(struct vc4_context *vc4,
174 struct pipe_surface *cbuf, struct pipe_surface *zsbuf)
175 {
176 /* Return the existing job for this FBO if we have one */
177 struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf};
178 struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs,
179 &local_key);
180 if (entry)
181 return entry->data;
182
183 /* Creating a new job. Make sure that any previous jobs reading or
184 * writing these buffers are flushed.
185 */
186 if (cbuf)
187 vc4_flush_jobs_reading_resource(vc4, cbuf->texture);
188 if (zsbuf)
189 vc4_flush_jobs_reading_resource(vc4, zsbuf->texture);
190
191 struct vc4_job *job = vc4_job_create(vc4);
192
193 if (cbuf) {
194 if (cbuf->texture->nr_samples > 1) {
195 job->msaa = true;
196 pipe_surface_reference(&job->msaa_color_write, cbuf);
197 } else {
198 pipe_surface_reference(&job->color_write, cbuf);
199 }
200 }
201
202 if (zsbuf) {
203 if (zsbuf->texture->nr_samples > 1) {
204 job->msaa = true;
205 pipe_surface_reference(&job->msaa_zs_write, zsbuf);
206 } else {
207 pipe_surface_reference(&job->zs_write, zsbuf);
208 }
209 }
210
211 if (job->msaa) {
212 job->tile_width = 32;
213 job->tile_height = 32;
214 } else {
215 job->tile_width = 64;
216 job->tile_height = 64;
217 }
218
219 if (cbuf)
220 _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job);
221 if (zsbuf)
222 _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job);
223
224 job->key.cbuf = cbuf;
225 job->key.zsbuf = zsbuf;
226 _mesa_hash_table_insert(vc4->jobs, &job->key, job);
227
228 return job;
229 }
230
231 struct vc4_job *
vc4_get_job_for_fbo(struct vc4_context * vc4)232 vc4_get_job_for_fbo(struct vc4_context *vc4)
233 {
234 if (vc4->job)
235 return vc4->job;
236
237 struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
238 struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
239 struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf);
240
241 /* The dirty flags are tracking what's been updated while vc4->job has
242 * been bound, so set them all to ~0 when switching between jobs. We
243 * also need to reset all state at the start of rendering.
244 */
245 vc4->dirty = ~0;
246
247 /* Set up the read surfaces in the job. If they aren't actually
248 * getting read (due to a clear starting the frame), job->cleared will
249 * mask out the read.
250 */
251 pipe_surface_reference(&job->color_read, cbuf);
252 pipe_surface_reference(&job->zs_read, zsbuf);
253
254 /* If we're binding to uninitialized buffers, no need to load their
255 * contents before drawing.
256 */
257 if (cbuf) {
258 struct vc4_resource *rsc = vc4_resource(cbuf->texture);
259 if (!rsc->writes)
260 job->cleared |= PIPE_CLEAR_COLOR0;
261 }
262
263 if (zsbuf) {
264 struct vc4_resource *rsc = vc4_resource(zsbuf->texture);
265 if (!rsc->writes)
266 job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
267 }
268
269 job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width,
270 job->tile_width);
271 job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height,
272 job->tile_height);
273
274 /* Initialize the job with the raster order flags -- each draw will
275 * check that we haven't changed the flags, since that requires a
276 * flush.
277 */
278 if (vc4->rasterizer)
279 job->flags = vc4->rasterizer->tile_raster_order_flags;
280
281 vc4->job = job;
282
283 return job;
284 }
285
286 static void
vc4_submit_setup_rcl_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf,bool is_depth,bool is_write)287 vc4_submit_setup_rcl_surface(struct vc4_job *job,
288 struct drm_vc4_submit_rcl_surface *submit_surf,
289 struct pipe_surface *psurf,
290 bool is_depth, bool is_write)
291 {
292 struct vc4_surface *surf = vc4_surface(psurf);
293
294 if (!surf)
295 return;
296
297 struct vc4_resource *rsc = vc4_resource(psurf->texture);
298 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
299 submit_surf->offset = surf->offset;
300
301 if (psurf->texture->nr_samples <= 1) {
302 if (is_depth) {
303 submit_surf->bits =
304 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
305 VC4_LOADSTORE_TILE_BUFFER_BUFFER);
306
307 } else {
308 submit_surf->bits =
309 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
310 VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
311 VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
312 VC4_LOADSTORE_TILE_BUFFER_BGR565 :
313 VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
314 VC4_LOADSTORE_TILE_BUFFER_FORMAT);
315 }
316 submit_surf->bits |=
317 VC4_SET_FIELD(surf->tiling,
318 VC4_LOADSTORE_TILE_BUFFER_TILING);
319 } else {
320 assert(!is_write);
321 submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
322 }
323
324 if (is_write)
325 rsc->writes++;
326 }
327
328 static void
vc4_submit_setup_rcl_render_config_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf)329 vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job,
330 struct drm_vc4_submit_rcl_surface *submit_surf,
331 struct pipe_surface *psurf)
332 {
333 struct vc4_surface *surf = vc4_surface(psurf);
334
335 if (!surf)
336 return;
337
338 struct vc4_resource *rsc = vc4_resource(psurf->texture);
339 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
340 submit_surf->offset = surf->offset;
341
342 if (psurf->texture->nr_samples <= 1) {
343 submit_surf->bits =
344 VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
345 VC4_RENDER_CONFIG_FORMAT_BGR565 :
346 VC4_RENDER_CONFIG_FORMAT_RGBA8888,
347 VC4_RENDER_CONFIG_FORMAT) |
348 VC4_SET_FIELD(surf->tiling,
349 VC4_RENDER_CONFIG_MEMORY_FORMAT);
350 }
351
352 rsc->writes++;
353 }
354
355 static void
vc4_submit_setup_rcl_msaa_surface(struct vc4_job * job,struct drm_vc4_submit_rcl_surface * submit_surf,struct pipe_surface * psurf)356 vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job,
357 struct drm_vc4_submit_rcl_surface *submit_surf,
358 struct pipe_surface *psurf)
359 {
360 struct vc4_surface *surf = vc4_surface(psurf);
361
362 if (!surf)
363 return;
364
365 struct vc4_resource *rsc = vc4_resource(psurf->texture);
366 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
367 submit_surf->offset = surf->offset;
368 submit_surf->bits = 0;
369 rsc->writes++;
370 }
371
372 /**
373 * Submits the job to the kernel and then reinitializes it.
374 */
375 void
vc4_job_submit(struct vc4_context * vc4,struct vc4_job * job)376 vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
377 {
378 MESA_TRACE_FUNC();
379
380 if (!job->needs_flush)
381 goto done;
382
383 /* The RCL setup would choke if the draw bounds cause no drawing, so
384 * just drop the drawing if that's the case.
385 */
386 if (job->draw_max_x <= job->draw_min_x ||
387 job->draw_max_y <= job->draw_min_y) {
388 goto done;
389 }
390
391 if (VC4_DBG(CL)) {
392 fprintf(stderr, "BCL:\n");
393 vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false);
394 }
395
396 if (cl_offset(&job->bcl) > 0) {
397 /* Increment the semaphore indicating that binning is done and
398 * unblocking the render thread. Note that this doesn't act
399 * until the FLUSH completes.
400 */
401 cl_ensure_space(&job->bcl, 8);
402 cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
403 /* The FLUSH caps all of our bin lists with a
404 * VC4_PACKET_RETURN.
405 */
406 cl_emit(&job->bcl, FLUSH, flush);
407 }
408 struct drm_vc4_submit_cl submit = {
409 .color_read.hindex = ~0,
410 .zs_read.hindex = ~0,
411 .color_write.hindex = ~0,
412 .msaa_color_write.hindex = ~0,
413 .zs_write.hindex = ~0,
414 .msaa_zs_write.hindex = ~0,
415 };
416
417 cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t));
418 cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *));
419
420 if (job->resolve & PIPE_CLEAR_COLOR) {
421 if (!(job->cleared & PIPE_CLEAR_COLOR)) {
422 vc4_submit_setup_rcl_surface(job, &submit.color_read,
423 job->color_read,
424 false, false);
425 }
426 vc4_submit_setup_rcl_render_config_surface(job,
427 &submit.color_write,
428 job->color_write);
429 vc4_submit_setup_rcl_msaa_surface(job,
430 &submit.msaa_color_write,
431 job->msaa_color_write);
432 }
433 if (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
434 if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
435 vc4_submit_setup_rcl_surface(job, &submit.zs_read,
436 job->zs_read, true, false);
437 }
438 vc4_submit_setup_rcl_surface(job, &submit.zs_write,
439 job->zs_write, true, true);
440 vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write,
441 job->msaa_zs_write);
442 }
443
444 if (job->msaa) {
445 /* This bit controls how many pixels the general
446 * (i.e. subsampled) loads/stores are iterating over
447 * (multisample loads replicate out to the other samples).
448 */
449 submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;
450 /* Controls whether color_write's
451 * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
452 */
453 submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
454 }
455
456 submit.bo_handles = (uintptr_t)job->bo_handles.base;
457 submit.bo_handle_count = cl_offset(&job->bo_handles) / 4;
458 submit.bin_cl = (uintptr_t)job->bcl.base;
459 submit.bin_cl_size = cl_offset(&job->bcl);
460 submit.shader_rec = (uintptr_t)job->shader_rec.base;
461 submit.shader_rec_size = cl_offset(&job->shader_rec);
462 submit.shader_rec_count = job->shader_rec_count;
463 submit.uniforms = (uintptr_t)job->uniforms.base;
464 submit.uniforms_size = cl_offset(&job->uniforms);
465 if (job->perfmon)
466 submit.perfmonid = job->perfmon->id;
467
468 assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
469 submit.min_x_tile = job->draw_min_x / job->tile_width;
470 submit.min_y_tile = job->draw_min_y / job->tile_height;
471 submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width;
472 submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height;
473 submit.width = job->draw_width;
474 submit.height = job->draw_height;
475 if (job->cleared) {
476 submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
477 submit.clear_color[0] = job->clear_color[0];
478 submit.clear_color[1] = job->clear_color[1];
479 submit.clear_z = job->clear_depth;
480 submit.clear_s = job->clear_stencil;
481 }
482 submit.flags |= job->flags;
483
484 if (vc4->screen->has_syncobj) {
485 submit.out_sync = vc4->job_syncobj;
486
487 if (vc4->in_fence_fd >= 0) {
488 /* This replaces the fence in the syncobj. */
489 drmSyncobjImportSyncFile(vc4->fd, vc4->in_syncobj,
490 vc4->in_fence_fd);
491 submit.in_sync = vc4->in_syncobj;
492 close(vc4->in_fence_fd);
493 vc4->in_fence_fd = -1;
494 }
495 }
496
497 if (!VC4_DBG(NORAST)) {
498 int ret;
499
500 ret = vc4_ioctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
501 static bool warned = false;
502 if (ret && !warned) {
503 fprintf(stderr, "Draw call returned %s. "
504 "Expect corruption.\n", strerror(errno));
505 warned = true;
506 } else if (!ret) {
507 vc4->last_emit_seqno = submit.seqno;
508 if (job->perfmon)
509 job->perfmon->last_seqno = submit.seqno;
510 }
511 }
512
513 if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
514 if (!vc4_wait_seqno(vc4->screen,
515 vc4->last_emit_seqno - 5,
516 OS_TIMEOUT_INFINITE,
517 "job throttling")) {
518 fprintf(stderr, "Job throttling failed\n");
519 }
520 }
521
522 if (VC4_DBG(ALWAYS_SYNC)) {
523 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
524 OS_TIMEOUT_INFINITE, "sync")) {
525 fprintf(stderr, "Wait failed.\n");
526 abort();
527 }
528 }
529
530 done:
531 vc4_job_free(vc4, job);
532 }
533
534 static bool
vc4_job_compare(const void * a,const void * b)535 vc4_job_compare(const void *a, const void *b)
536 {
537 return memcmp(a, b, sizeof(struct vc4_job_key)) == 0;
538 }
539
540 static uint32_t
vc4_job_hash(const void * key)541 vc4_job_hash(const void *key)
542 {
543 return _mesa_hash_data(key, sizeof(struct vc4_job_key));
544 }
545
546 int
vc4_job_init(struct vc4_context * vc4)547 vc4_job_init(struct vc4_context *vc4)
548 {
549 vc4->jobs = _mesa_hash_table_create(vc4,
550 vc4_job_hash,
551 vc4_job_compare);
552 vc4->write_jobs = _mesa_hash_table_create(vc4,
553 _mesa_hash_pointer,
554 _mesa_key_pointer_equal);
555
556 if (vc4->screen->has_syncobj) {
557 /* Create the syncobj as signaled since with no job executed
558 * there is nothing to wait on.
559 */
560 int ret = drmSyncobjCreate(vc4->fd,
561 DRM_SYNCOBJ_CREATE_SIGNALED,
562 &vc4->job_syncobj);
563 if (ret) {
564 /* If the screen indicated syncobj support, we should
565 * be able to create a signaled syncobj.
566 * At this point it is too late to pretend the screen
567 * has no syncobj support.
568 */
569 return ret;
570 }
571 }
572
573 return 0;
574 }
575
576