• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019-2020 Collabora, Ltd.
3  * Copyright (C) 2019 Alyssa Rosenzweig
4  * Copyright (C) 2014-2017 Broadcom
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  */
26 
27 #include <assert.h>
28 
29 #include "drm-uapi/panfrost_drm.h"
30 
31 #include "pan_bo.h"
32 #include "pan_context.h"
33 #include "util/hash_table.h"
34 #include "util/ralloc.h"
35 #include "util/format/u_format.h"
36 #include "util/u_pack_color.h"
37 #include "util/rounding.h"
38 #include "util/u_framebuffer.h"
39 #include "pan_util.h"
40 #include "decode.h"
41 
42 #define foreach_batch(ctx, idx) \
43         BITSET_FOREACH_SET(idx, ctx->batches.active, PAN_MAX_BATCHES)
44 
45 static unsigned
panfrost_batch_idx(struct panfrost_batch * batch)46 panfrost_batch_idx(struct panfrost_batch *batch)
47 {
48         return batch - batch->ctx->batches.slots;
49 }
50 
51 /* Adds the BO backing surface to a batch if the surface is non-null */
52 
53 static void
panfrost_batch_add_surface(struct panfrost_batch * batch,struct pipe_surface * surf)54 panfrost_batch_add_surface(struct panfrost_batch *batch, struct pipe_surface *surf)
55 {
56         if (surf) {
57                 struct panfrost_resource *rsrc = pan_resource(surf->texture);
58                 panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);
59         }
60 }
61 
62 static void
panfrost_batch_init(struct panfrost_context * ctx,const struct pipe_framebuffer_state * key,struct panfrost_batch * batch)63 panfrost_batch_init(struct panfrost_context *ctx,
64                     const struct pipe_framebuffer_state *key,
65                     struct panfrost_batch *batch)
66 {
67         struct pipe_screen *pscreen = ctx->base.screen;
68         struct panfrost_screen *screen = pan_screen(pscreen);
69         struct panfrost_device *dev = &screen->dev;
70 
71         batch->ctx = ctx;
72 
73         batch->seqnum = ++ctx->batches.seqnum;
74 
75         util_dynarray_init(&batch->bos, NULL);
76 
77         batch->minx = batch->miny = ~0;
78         batch->maxx = batch->maxy = 0;
79 
80         util_copy_framebuffer_state(&batch->key, key);
81         batch->resources =_mesa_set_create(NULL, _mesa_hash_pointer,
82                                           _mesa_key_pointer_equal);
83 
84         /* Preallocate the main pool, since every batch has at least one job
85          * structure so it will be used */
86         panfrost_pool_init(&batch->pool, NULL, dev, 0, 65536, "Batch pool", true, true);
87 
88         /* Don't preallocate the invisible pool, since not every batch will use
89          * the pre-allocation, particularly if the varyings are larger than the
90          * preallocation and a reallocation is needed after anyway. */
91         panfrost_pool_init(&batch->invisible_pool, NULL, dev,
92                         PAN_BO_INVISIBLE, 65536, "Varyings", false, true);
93 
94         for (unsigned i = 0; i < batch->key.nr_cbufs; ++i)
95                 panfrost_batch_add_surface(batch, batch->key.cbufs[i]);
96 
97         panfrost_batch_add_surface(batch, batch->key.zsbuf);
98 
99         screen->vtbl.init_batch(batch);
100 }
101 
102 static void
panfrost_batch_cleanup(struct panfrost_context * ctx,struct panfrost_batch * batch)103 panfrost_batch_cleanup(struct panfrost_context *ctx, struct panfrost_batch *batch)
104 {
105         struct panfrost_device *dev = pan_device(ctx->base.screen);
106 
107         assert(batch->seqnum);
108 
109         if (ctx->batch == batch)
110                 ctx->batch = NULL;
111 
112         unsigned batch_idx = panfrost_batch_idx(batch);
113 
114         pan_bo_access *flags = util_dynarray_begin(&batch->bos);
115         unsigned end_bo = util_dynarray_num_elements(&batch->bos, pan_bo_access);
116 
117         for (int i = 0; i < end_bo; ++i) {
118                 if (!flags[i])
119                         continue;
120 
121                 struct panfrost_bo *bo = pan_lookup_bo(dev, i);
122                 panfrost_bo_unreference(bo);
123         }
124 
125         set_foreach(batch->resources, entry) {
126                 struct panfrost_resource *rsrc = (void *) entry->key;
127 
128                 if (_mesa_hash_table_search(ctx->writers, rsrc)) {
129                         _mesa_hash_table_remove_key(ctx->writers, rsrc);
130                         rsrc->track.nr_writers--;
131                 }
132 
133                 rsrc->track.nr_users--;
134 
135                 pipe_resource_reference((struct pipe_resource **) &rsrc, NULL);
136         }
137 
138         _mesa_set_destroy(batch->resources, NULL);
139         panfrost_pool_cleanup(&batch->pool);
140         panfrost_pool_cleanup(&batch->invisible_pool);
141 
142         util_unreference_framebuffer_state(&batch->key);
143 
144         util_dynarray_fini(&batch->bos);
145 
146         memset(batch, 0, sizeof(*batch));
147         BITSET_CLEAR(ctx->batches.active, batch_idx);
148 }
149 
150 static void
151 panfrost_batch_submit(struct panfrost_context *ctx,
152                       struct panfrost_batch *batch);
153 
154 static struct panfrost_batch *
panfrost_get_batch(struct panfrost_context * ctx,const struct pipe_framebuffer_state * key)155 panfrost_get_batch(struct panfrost_context *ctx,
156                    const struct pipe_framebuffer_state *key)
157 {
158         struct panfrost_batch *batch = NULL;
159 
160         for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
161                 if (ctx->batches.slots[i].seqnum &&
162                     util_framebuffer_state_equal(&ctx->batches.slots[i].key, key)) {
163                         /* We found a match, increase the seqnum for the LRU
164                          * eviction logic.
165                          */
166                         ctx->batches.slots[i].seqnum = ++ctx->batches.seqnum;
167                         return &ctx->batches.slots[i];
168                 }
169 
170                 if (!batch || batch->seqnum > ctx->batches.slots[i].seqnum)
171                         batch = &ctx->batches.slots[i];
172         }
173 
174         assert(batch);
175 
176         /* The selected slot is used, we need to flush the batch */
177         if (batch->seqnum)
178                 panfrost_batch_submit(ctx, batch);
179 
180         panfrost_batch_init(ctx, key, batch);
181 
182         unsigned batch_idx = panfrost_batch_idx(batch);
183         BITSET_SET(ctx->batches.active, batch_idx);
184 
185         return batch;
186 }
187 
188 /* Get the job corresponding to the FBO we're currently rendering into */
189 
190 struct panfrost_batch *
panfrost_get_batch_for_fbo(struct panfrost_context * ctx)191 panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
192 {
193         /* If we already began rendering, use that */
194 
195         if (ctx->batch) {
196                 assert(util_framebuffer_state_equal(&ctx->batch->key,
197                                                     &ctx->pipe_framebuffer));
198                 return ctx->batch;
199         }
200 
201         /* If not, look up the job */
202         struct panfrost_batch *batch = panfrost_get_batch(ctx,
203                                                           &ctx->pipe_framebuffer);
204 
205         /* Set this job as the current FBO job. Will be reset when updating the
206          * FB state and when submitting or releasing a job.
207          */
208         ctx->batch = batch;
209         panfrost_dirty_state_all(ctx);
210         return batch;
211 }
212 
213 struct panfrost_batch *
panfrost_get_fresh_batch_for_fbo(struct panfrost_context * ctx,const char * reason)214 panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx, const char *reason)
215 {
216         struct panfrost_batch *batch;
217 
218         batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
219         panfrost_dirty_state_all(ctx);
220 
221         /* We only need to submit and get a fresh batch if there is no
222          * draw/clear queued. Otherwise we may reuse the batch. */
223 
224         if (batch->scoreboard.first_job) {
225                 perf_debug_ctx(ctx, "Flushing the current FBO due to: %s", reason);
226                 panfrost_batch_submit(ctx, batch);
227                 batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
228         }
229 
230         ctx->batch = batch;
231         return batch;
232 }
233 
234 static void
panfrost_batch_update_access(struct panfrost_batch * batch,struct panfrost_resource * rsrc,bool writes)235 panfrost_batch_update_access(struct panfrost_batch *batch,
236                              struct panfrost_resource *rsrc, bool writes)
237 {
238         struct panfrost_context *ctx = batch->ctx;
239         uint32_t batch_idx = panfrost_batch_idx(batch);
240         struct hash_entry *entry = _mesa_hash_table_search(ctx->writers, rsrc);
241         struct panfrost_batch *writer = entry ? entry->data : NULL;
242         bool found = false;
243 
244         _mesa_set_search_or_add(batch->resources, rsrc, &found);
245 
246         if (!found) {
247                 /* Cache number of batches accessing a resource */
248                 rsrc->track.nr_users++;
249 
250                 /* Reference the resource on the batch */
251                 pipe_reference(NULL, &rsrc->base.reference);
252         }
253 
254         /* Flush users if required */
255         if (writes || ((writer != NULL) && (writer != batch))) {
256                 unsigned i;
257                 foreach_batch(ctx, i) {
258                         struct panfrost_batch *batch = &ctx->batches.slots[i];
259 
260                         /* Skip the entry if this our batch. */
261                         if (i == batch_idx)
262                                 continue;
263 
264                         /* Submit if it's a user */
265                         if (_mesa_set_search(batch->resources, rsrc))
266                                 panfrost_batch_submit(ctx, batch);
267                 }
268         }
269 
270         if (writes) {
271                 _mesa_hash_table_insert(ctx->writers, rsrc, batch);
272                 rsrc->track.nr_writers++;
273         }
274 }
275 
276 static pan_bo_access *
panfrost_batch_get_bo_access(struct panfrost_batch * batch,unsigned handle)277 panfrost_batch_get_bo_access(struct panfrost_batch *batch, unsigned handle)
278 {
279         unsigned size = util_dynarray_num_elements(&batch->bos, pan_bo_access);
280 
281         if (handle >= size) {
282                 unsigned grow = handle + 1 - size;
283 
284                 memset(util_dynarray_grow(&batch->bos, pan_bo_access, grow),
285                        0, grow * sizeof(pan_bo_access));
286         }
287 
288         return util_dynarray_element(&batch->bos, pan_bo_access, handle);
289 }
290 
291 static void
panfrost_batch_add_bo_old(struct panfrost_batch * batch,struct panfrost_bo * bo,uint32_t flags)292 panfrost_batch_add_bo_old(struct panfrost_batch *batch,
293                 struct panfrost_bo *bo, uint32_t flags)
294 {
295         if (!bo)
296                 return;
297 
298         pan_bo_access *entry =
299                 panfrost_batch_get_bo_access(batch, bo->gem_handle);
300         pan_bo_access old_flags = *entry;
301 
302         if (!old_flags) {
303                 batch->num_bos++;
304                 panfrost_bo_reference(bo);
305         }
306 
307         if (old_flags == flags)
308                 return;
309 
310         flags |= old_flags;
311         *entry = flags;
312 }
313 
314 static uint32_t
panfrost_access_for_stage(enum pipe_shader_type stage)315 panfrost_access_for_stage(enum pipe_shader_type stage)
316 {
317         return (stage == PIPE_SHADER_FRAGMENT) ?
318                 PAN_BO_ACCESS_FRAGMENT : PAN_BO_ACCESS_VERTEX_TILER;
319 }
320 
321 void
panfrost_batch_add_bo(struct panfrost_batch * batch,struct panfrost_bo * bo,enum pipe_shader_type stage)322 panfrost_batch_add_bo(struct panfrost_batch *batch,
323                 struct panfrost_bo *bo, enum pipe_shader_type stage)
324 {
325         panfrost_batch_add_bo_old(batch, bo, PAN_BO_ACCESS_READ |
326                         panfrost_access_for_stage(stage));
327 }
328 
329 void
panfrost_batch_read_rsrc(struct panfrost_batch * batch,struct panfrost_resource * rsrc,enum pipe_shader_type stage)330 panfrost_batch_read_rsrc(struct panfrost_batch *batch,
331                          struct panfrost_resource *rsrc,
332                          enum pipe_shader_type stage)
333 {
334         uint32_t access = PAN_BO_ACCESS_READ |
335                 panfrost_access_for_stage(stage);
336 
337         panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
338 
339         if (rsrc->image.crc.bo)
340                 panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access);
341 
342         if (rsrc->separate_stencil)
343                 panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);
344 
345         panfrost_batch_update_access(batch, rsrc, false);
346 }
347 
348 void
panfrost_batch_write_rsrc(struct panfrost_batch * batch,struct panfrost_resource * rsrc,enum pipe_shader_type stage)349 panfrost_batch_write_rsrc(struct panfrost_batch *batch,
350                          struct panfrost_resource *rsrc,
351                          enum pipe_shader_type stage)
352 {
353         uint32_t access = PAN_BO_ACCESS_WRITE |
354                 panfrost_access_for_stage(stage);
355 
356         panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
357 
358         if (rsrc->image.crc.bo)
359                 panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access);
360 
361         if (rsrc->separate_stencil)
362                 panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);
363 
364         panfrost_batch_update_access(batch, rsrc, true);
365 }
366 
367 void
panfrost_resource_swap_bo(struct panfrost_context * ctx,struct panfrost_resource * rsrc,struct panfrost_bo * newbo)368 panfrost_resource_swap_bo(struct panfrost_context *ctx,
369                           struct panfrost_resource *rsrc,
370                           struct panfrost_bo *newbo)
371 {
372         /* Any batch writing this resource is writing to the old BO, not the
373          * new BO. After swapping the resource's backing BO, there will be no
374          * writers of the updated resource. Existing writers still hold a
375          * reference to the old BO for reference counting.
376          */
377         struct hash_entry *writer = _mesa_hash_table_search(ctx->writers, rsrc);
378         if (writer) {
379                 _mesa_hash_table_remove(ctx->writers, writer);
380                 rsrc->track.nr_writers--;
381         }
382 
383         /* Likewise, any batch reading this resource is reading the old BO, and
384          * after swapping will not be reading this resource.
385          */
386         unsigned i;
387         foreach_batch(ctx, i) {
388                 struct panfrost_batch *batch = &ctx->batches.slots[i];
389                 struct set_entry *ent = _mesa_set_search(batch->resources, rsrc);
390 
391                 if (!ent)
392                         continue;
393 
394                 _mesa_set_remove(batch->resources, ent);
395                 rsrc->track.nr_users--;
396         }
397 
398         /* Swap the pointers, dropping a reference to the old BO which is no
399          * long referenced from the resource
400          */
401         panfrost_bo_unreference(rsrc->image.data.bo);
402         rsrc->image.data.bo = newbo;
403 }
404 
405 struct panfrost_bo *
panfrost_batch_create_bo(struct panfrost_batch * batch,size_t size,uint32_t create_flags,enum pipe_shader_type stage,const char * label)406 panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
407                          uint32_t create_flags, enum pipe_shader_type stage,
408                          const char *label)
409 {
410         struct panfrost_bo *bo;
411 
412         bo = panfrost_bo_create(pan_device(batch->ctx->base.screen), size,
413                                 create_flags, label);
414         panfrost_batch_add_bo(batch, bo, stage);
415 
416         /* panfrost_batch_add_bo() has retained a reference and
417          * panfrost_bo_create() initialize the refcnt to 1, so let's
418          * unreference the BO here so it gets released when the batch is
419          * destroyed (unless it's retained by someone else in the meantime).
420          */
421         panfrost_bo_unreference(bo);
422         return bo;
423 }
424 
425 struct panfrost_bo *
panfrost_batch_get_scratchpad(struct panfrost_batch * batch,unsigned size_per_thread,unsigned thread_tls_alloc,unsigned core_id_range)426 panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
427                 unsigned size_per_thread,
428                 unsigned thread_tls_alloc,
429                 unsigned core_id_range)
430 {
431         unsigned size = panfrost_get_total_stack_size(size_per_thread,
432                         thread_tls_alloc,
433                         core_id_range);
434 
435         if (batch->scratchpad) {
436                 assert(batch->scratchpad->size >= size);
437         } else {
438                 batch->scratchpad = panfrost_batch_create_bo(batch, size,
439                                              PAN_BO_INVISIBLE,
440                                              PIPE_SHADER_VERTEX,
441                                              "Thread local storage");
442 
443                 panfrost_batch_add_bo(batch, batch->scratchpad,
444                                 PIPE_SHADER_FRAGMENT);
445         }
446 
447         return batch->scratchpad;
448 }
449 
450 struct panfrost_bo *
panfrost_batch_get_shared_memory(struct panfrost_batch * batch,unsigned size,unsigned workgroup_count)451 panfrost_batch_get_shared_memory(struct panfrost_batch *batch,
452                 unsigned size,
453                 unsigned workgroup_count)
454 {
455         if (batch->shared_memory) {
456                 assert(batch->shared_memory->size >= size);
457         } else {
458                 batch->shared_memory = panfrost_batch_create_bo(batch, size,
459                                              PAN_BO_INVISIBLE,
460                                              PIPE_SHADER_VERTEX,
461                                              "Workgroup shared memory");
462         }
463 
464         return batch->shared_memory;
465 }
466 
467 static void
panfrost_batch_to_fb_info(const struct panfrost_batch * batch,struct pan_fb_info * fb,struct pan_image_view * rts,struct pan_image_view * zs,struct pan_image_view * s,bool reserve)468 panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
469                           struct pan_fb_info *fb,
470                           struct pan_image_view *rts,
471                           struct pan_image_view *zs,
472                           struct pan_image_view *s,
473                           bool reserve)
474 {
475         memset(fb, 0, sizeof(*fb));
476         memset(rts, 0, sizeof(*rts) * 8);
477         memset(zs, 0, sizeof(*zs));
478         memset(s, 0, sizeof(*s));
479 
480         fb->width = batch->key.width;
481         fb->height = batch->key.height;
482         fb->extent.minx = batch->minx;
483         fb->extent.miny = batch->miny;
484         fb->extent.maxx = batch->maxx - 1;
485         fb->extent.maxy = batch->maxy - 1;
486         fb->nr_samples = util_framebuffer_get_num_samples(&batch->key);
487         fb->rt_count = batch->key.nr_cbufs;
488         fb->sprite_coord_origin = pan_tristate_get(batch->sprite_coord_origin);
489         fb->first_provoking_vertex = pan_tristate_get(batch->first_provoking_vertex);
490 
491         static const unsigned char id_swz[] = {
492                 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
493         };
494 
495         for (unsigned i = 0; i < fb->rt_count; i++) {
496                 struct pipe_surface *surf = batch->key.cbufs[i];
497 
498                 if (!surf)
499                         continue;
500 
501                 struct panfrost_resource *prsrc = pan_resource(surf->texture);
502                 unsigned mask = PIPE_CLEAR_COLOR0 << i;
503 
504                 if (batch->clear & mask) {
505                         fb->rts[i].clear = true;
506                         memcpy(fb->rts[i].clear_value, batch->clear_color[i],
507                                sizeof((fb->rts[i].clear_value)));
508                 }
509 
510                 fb->rts[i].discard = !reserve && !(batch->resolve & mask);
511 
512                 rts[i].format = surf->format;
513                 rts[i].dim = MALI_TEXTURE_DIMENSION_2D;
514                 rts[i].last_level = rts[i].first_level = surf->u.tex.level;
515                 rts[i].first_layer = surf->u.tex.first_layer;
516                 rts[i].last_layer = surf->u.tex.last_layer;
517                 rts[i].image = &prsrc->image;
518                 rts[i].nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
519                 memcpy(rts[i].swizzle, id_swz, sizeof(rts[i].swizzle));
520                 fb->rts[i].crc_valid = &prsrc->valid.crc;
521                 fb->rts[i].view = &rts[i];
522 
523                 /* Preload if the RT is read or updated */
524                 if (!(batch->clear & mask) &&
525                     ((batch->read & mask) ||
526                      ((batch->draws & mask) &&
527                       BITSET_TEST(prsrc->valid.data, fb->rts[i].view->first_level))))
528                         fb->rts[i].preload = true;
529 
530         }
531 
532         const struct pan_image_view *s_view = NULL, *z_view = NULL;
533         struct panfrost_resource *z_rsrc = NULL, *s_rsrc = NULL;
534 
535         if (batch->key.zsbuf) {
536                 struct pipe_surface *surf = batch->key.zsbuf;
537                 z_rsrc = pan_resource(surf->texture);
538 
539                 zs->format = surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ?
540                              PIPE_FORMAT_Z32_FLOAT : surf->format;
541                 zs->dim = MALI_TEXTURE_DIMENSION_2D;
542                 zs->last_level = zs->first_level = surf->u.tex.level;
543                 zs->first_layer = surf->u.tex.first_layer;
544                 zs->last_layer = surf->u.tex.last_layer;
545                 zs->image = &z_rsrc->image;
546                 zs->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
547                 memcpy(zs->swizzle, id_swz, sizeof(zs->swizzle));
548                 fb->zs.view.zs = zs;
549                 z_view = zs;
550                 if (util_format_is_depth_and_stencil(zs->format)) {
551                         s_view = zs;
552                         s_rsrc = z_rsrc;
553                 }
554 
555                 if (z_rsrc->separate_stencil) {
556                         s_rsrc = z_rsrc->separate_stencil;
557                         s->format = PIPE_FORMAT_S8_UINT;
558                         s->dim = MALI_TEXTURE_DIMENSION_2D;
559                         s->last_level = s->first_level = surf->u.tex.level;
560                         s->first_layer = surf->u.tex.first_layer;
561                         s->last_layer = surf->u.tex.last_layer;
562                         s->image = &s_rsrc->image;
563                         s->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
564                         memcpy(s->swizzle, id_swz, sizeof(s->swizzle));
565                         fb->zs.view.s = s;
566                         s_view = s;
567                 }
568         }
569 
570         if (batch->clear & PIPE_CLEAR_DEPTH) {
571                 fb->zs.clear.z = true;
572                 fb->zs.clear_value.depth = batch->clear_depth;
573         }
574 
575         if (batch->clear & PIPE_CLEAR_STENCIL) {
576                 fb->zs.clear.s = true;
577                 fb->zs.clear_value.stencil = batch->clear_stencil;
578         }
579 
580         fb->zs.discard.z = !reserve && !(batch->resolve & PIPE_CLEAR_DEPTH);
581         fb->zs.discard.s = !reserve && !(batch->resolve & PIPE_CLEAR_STENCIL);
582 
583         if (!fb->zs.clear.z &&
584             ((batch->read & PIPE_CLEAR_DEPTH) ||
585              ((batch->draws & PIPE_CLEAR_DEPTH) &&
586               z_rsrc && BITSET_TEST(z_rsrc->valid.data, z_view->first_level))))
587                 fb->zs.preload.z = true;
588 
589         if (!fb->zs.clear.s &&
590             ((batch->read & PIPE_CLEAR_STENCIL) ||
591              ((batch->draws & PIPE_CLEAR_STENCIL) &&
592               s_rsrc && BITSET_TEST(s_rsrc->valid.data, s_view->first_level))))
593                 fb->zs.preload.s = true;
594 
595         /* Preserve both component if we have a combined ZS view and
596          * one component needs to be preserved.
597          */
598         if (s_view == z_view && fb->zs.discard.z != fb->zs.discard.s) {
599                 bool valid = BITSET_TEST(z_rsrc->valid.data, z_view->first_level);
600 
601                 fb->zs.discard.z = false;
602                 fb->zs.discard.s = false;
603                 fb->zs.preload.z = !fb->zs.clear.z && valid;
604                 fb->zs.preload.s = !fb->zs.clear.s && valid;
605         }
606 }
607 
608 static int
panfrost_batch_submit_ioctl(struct panfrost_batch * batch,mali_ptr first_job_desc,uint32_t reqs,uint32_t in_sync,uint32_t out_sync)609 panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
610                             mali_ptr first_job_desc,
611                             uint32_t reqs,
612                             uint32_t in_sync,
613                             uint32_t out_sync)
614 {
615         struct panfrost_context *ctx = batch->ctx;
616         struct pipe_context *gallium = (struct pipe_context *) ctx;
617         struct panfrost_device *dev = pan_device(gallium->screen);
618         struct drm_panfrost_submit submit = {0,};
619         uint32_t *bo_handles;
620         int ret;
621 
622         /* If we trace, we always need a syncobj, so make one of our own if we
623          * weren't given one to use. Remember that we did so, so we can free it
624          * after we're done but preventing double-frees if we were given a
625          * syncobj */
626 
627         if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
628                 out_sync = ctx->syncobj;
629 
630         submit.out_sync = out_sync;
631         submit.jc = first_job_desc;
632         submit.requirements = reqs;
633         if (in_sync) {
634                 submit.in_syncs = (u64)(uintptr_t)(&in_sync);
635                 submit.in_sync_count = 1;
636         }
637 
638         bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
639                             panfrost_pool_num_bos(&batch->invisible_pool) +
640                             batch->num_bos + 2,
641                             sizeof(*bo_handles));
642         assert(bo_handles);
643 
644         pan_bo_access *flags = util_dynarray_begin(&batch->bos);
645         unsigned end_bo = util_dynarray_num_elements(&batch->bos, pan_bo_access);
646 
647         for (int i = 0; i < end_bo; ++i) {
648                 if (!flags[i])
649                         continue;
650 
651                 assert(submit.bo_handle_count < batch->num_bos);
652                 bo_handles[submit.bo_handle_count++] = i;
653 
654                 /* Update the BO access flags so that panfrost_bo_wait() knows
655                  * about all pending accesses.
656                  * We only keep the READ/WRITE info since this is all the BO
657                  * wait logic cares about.
658                  * We also preserve existing flags as this batch might not
659                  * be the first one to access the BO.
660                  */
661                 struct panfrost_bo *bo = pan_lookup_bo(dev, i);
662 
663                 bo->gpu_access |= flags[i] & (PAN_BO_ACCESS_RW);
664         }
665 
666         panfrost_pool_get_bo_handles(&batch->pool, bo_handles + submit.bo_handle_count);
667         submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);
668         panfrost_pool_get_bo_handles(&batch->invisible_pool, bo_handles + submit.bo_handle_count);
669         submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);
670 
671         /* Add the tiler heap to the list of accessed BOs if the batch has at
672          * least one tiler job. Tiler heap is written by tiler jobs and read
673          * by fragment jobs (the polygon list is coming from this heap).
674          */
675         if (batch->scoreboard.first_tiler)
676                 bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
677 
678         /* Always used on Bifrost, occassionally used on Midgard */
679         bo_handles[submit.bo_handle_count++] = dev->sample_positions->gem_handle;
680 
681         submit.bo_handles = (u64) (uintptr_t) bo_handles;
682         if (ctx->is_noop)
683                 ret = 0;
684         else
685                 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
686         free(bo_handles);
687 
688         if (ret)
689                 return errno;
690 
691         /* Trace the job if we're doing that */
692         if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
693                 /* Wait so we can get errors reported back */
694                 drmSyncobjWait(dev->fd, &out_sync, 1,
695                                INT64_MAX, 0, NULL);
696 
697                 if (dev->debug & PAN_DBG_TRACE)
698                         pandecode_jc(submit.jc, dev->gpu_id);
699 
700                 if (dev->debug & PAN_DBG_DUMP)
701                         pandecode_dump_mappings();
702 
703                 /* Jobs won't be complete if blackhole rendering, that's ok */
704                 if (!ctx->is_noop && dev->debug & PAN_DBG_SYNC)
705                         pandecode_abort_on_fault(submit.jc, dev->gpu_id);
706         }
707 
708         return 0;
709 }
710 
711 static bool
panfrost_has_fragment_job(struct panfrost_batch * batch)712 panfrost_has_fragment_job(struct panfrost_batch *batch)
713 {
714         return batch->scoreboard.first_tiler || batch->clear;
715 }
716 
717 /* Submit both vertex/tiler and fragment jobs for a batch, possibly with an
718  * outsync corresponding to the later of the two (since there will be an
719  * implicit dep between them) */
720 
721 static int
panfrost_batch_submit_jobs(struct panfrost_batch * batch,const struct pan_fb_info * fb,uint32_t in_sync,uint32_t out_sync)722 panfrost_batch_submit_jobs(struct panfrost_batch *batch,
723                            const struct pan_fb_info *fb,
724                            uint32_t in_sync, uint32_t out_sync)
725 {
726         struct pipe_screen *pscreen = batch->ctx->base.screen;
727         struct panfrost_screen *screen = pan_screen(pscreen);
728         struct panfrost_device *dev = pan_device(pscreen);
729         bool has_draws = batch->scoreboard.first_job;
730         bool has_tiler = batch->scoreboard.first_tiler;
731         bool has_frag = panfrost_has_fragment_job(batch);
732         int ret = 0;
733 
734         /* Take the submit lock to make sure no tiler jobs from other context
735          * are inserted between our tiler and fragment jobs, failing to do that
736          * might result in tiler heap corruption.
737          */
738         if (has_tiler)
739                 pthread_mutex_lock(&dev->submit_lock);
740 
741         if (has_draws) {
742                 ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job,
743                                                   0, in_sync, has_frag ? 0 : out_sync);
744 
745                 if (ret)
746                         goto done;
747         }
748 
749         if (has_frag) {
750                 mali_ptr fragjob = screen->vtbl.emit_fragment_job(batch, fb);
751                 ret = panfrost_batch_submit_ioctl(batch, fragjob,
752                                                   PANFROST_JD_REQ_FS, 0,
753                                                   out_sync);
754                 if (ret)
755                         goto done;
756         }
757 
758 done:
759         if (has_tiler)
760                 pthread_mutex_unlock(&dev->submit_lock);
761 
762         return ret;
763 }
764 
765 static void
panfrost_emit_tile_map(struct panfrost_batch * batch,struct pan_fb_info * fb)766 panfrost_emit_tile_map(struct panfrost_batch *batch, struct pan_fb_info *fb)
767 {
768         if (batch->key.nr_cbufs < 1 || !batch->key.cbufs[0])
769                 return;
770 
771         struct pipe_surface *surf = batch->key.cbufs[0];
772         struct panfrost_resource *pres = surf ? pan_resource(surf->texture) : NULL;
773 
774         if (pres && pres->damage.tile_map.enable) {
775                 fb->tile_map.base =
776                         pan_pool_upload_aligned(&batch->pool.base,
777                                                 pres->damage.tile_map.data,
778                                                 pres->damage.tile_map.size,
779                                                 64);
780                 fb->tile_map.stride = pres->damage.tile_map.stride;
781         }
782 }
783 
784 static void
panfrost_batch_submit(struct panfrost_context * ctx,struct panfrost_batch * batch)785 panfrost_batch_submit(struct panfrost_context *ctx,
786                       struct panfrost_batch *batch)
787 {
788         struct pipe_screen *pscreen = ctx->base.screen;
789         struct panfrost_screen *screen = pan_screen(pscreen);
790         int ret;
791 
792         /* Nothing to do! */
793         if (!batch->scoreboard.first_job && !batch->clear)
794                 goto out;
795 
796         if (batch->key.zsbuf && panfrost_has_fragment_job(batch)) {
797                 struct pipe_surface *surf = batch->key.zsbuf;
798                 struct panfrost_resource *z_rsrc = pan_resource(surf->texture);
799 
800                 /* Shared depth/stencil resources are not supported, and would
801                  * break this optimisation. */
802                 assert(!(z_rsrc->base.bind & PAN_BIND_SHARED_MASK));
803 
804                 if (batch->clear & PIPE_CLEAR_STENCIL) {
805                         z_rsrc->stencil_value = batch->clear_stencil;
806                         z_rsrc->constant_stencil = true;
807                 } else if (z_rsrc->constant_stencil) {
808                         batch->clear_stencil = z_rsrc->stencil_value;
809                         batch->clear |= PIPE_CLEAR_STENCIL;
810                 }
811 
812                 if (batch->draws & PIPE_CLEAR_STENCIL)
813                         z_rsrc->constant_stencil = false;
814         }
815 
816         struct pan_fb_info fb;
817         struct pan_image_view rts[8], zs, s;
818 
819         panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false);
820 
821         screen->vtbl.preload(batch, &fb);
822         screen->vtbl.init_polygon_list(batch);
823 
824         /* Now that all draws are in, we can finally prepare the
825          * FBD for the batch (if there is one). */
826 
827         screen->vtbl.emit_tls(batch);
828         panfrost_emit_tile_map(batch, &fb);
829 
830         if (batch->scoreboard.first_tiler || batch->clear)
831                 screen->vtbl.emit_fbd(batch, &fb);
832 
833         ret = panfrost_batch_submit_jobs(batch, &fb, 0, ctx->syncobj);
834 
835         if (ret)
836                 fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
837 
838         /* We must reset the damage info of our render targets here even
839          * though a damage reset normally happens when the DRI layer swaps
840          * buffers. That's because there can be implicit flushes the GL
841          * app is not aware of, and those might impact the damage region: if
842          * part of the damaged portion is drawn during those implicit flushes,
843          * you have to reload those areas before next draws are pushed, and
844          * since the driver can't easily know what's been modified by the draws
845          * it flushed, the easiest solution is to reload everything.
846          */
847         for (unsigned i = 0; i < batch->key.nr_cbufs; i++) {
848                 if (!batch->key.cbufs[i])
849                         continue;
850 
851                 panfrost_resource_set_damage_region(ctx->base.screen,
852                                                     batch->key.cbufs[i]->texture,
853                                                     0, NULL);
854         }
855 
856 out:
857         panfrost_batch_cleanup(ctx, batch);
858 }
859 
860 /* Submit all batches */
861 
862 void
panfrost_flush_all_batches(struct panfrost_context * ctx,const char * reason)863 panfrost_flush_all_batches(struct panfrost_context *ctx, const char *reason)
864 {
865         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
866         panfrost_batch_submit(ctx, batch);
867 
868         for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
869                 if (ctx->batches.slots[i].seqnum) {
870                         if (reason)
871                                 perf_debug_ctx(ctx, "Flushing everything due to: %s", reason);
872 
873                         panfrost_batch_submit(ctx, &ctx->batches.slots[i]);
874                 }
875         }
876 }
877 
878 void
panfrost_flush_writer(struct panfrost_context * ctx,struct panfrost_resource * rsrc,const char * reason)879 panfrost_flush_writer(struct panfrost_context *ctx,
880                       struct panfrost_resource *rsrc,
881                       const char *reason)
882 {
883         struct hash_entry *entry = _mesa_hash_table_search(ctx->writers, rsrc);
884 
885         if (entry) {
886                 perf_debug_ctx(ctx, "Flushing writer due to: %s", reason);
887                 panfrost_batch_submit(ctx, entry->data);
888         }
889 }
890 
891 void
panfrost_flush_batches_accessing_rsrc(struct panfrost_context * ctx,struct panfrost_resource * rsrc,const char * reason)892 panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
893                                       struct panfrost_resource *rsrc,
894                                       const char *reason)
895 {
896         unsigned i;
897         foreach_batch(ctx, i) {
898                 struct panfrost_batch *batch = &ctx->batches.slots[i];
899 
900                 if (!_mesa_set_search(batch->resources, rsrc))
901                         continue;
902 
903                 perf_debug_ctx(ctx, "Flushing user due to: %s", reason);
904                 panfrost_batch_submit(ctx, batch);
905         }
906 }
907 
908 void
panfrost_batch_adjust_stack_size(struct panfrost_batch * batch)909 panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
910 {
911         struct panfrost_context *ctx = batch->ctx;
912 
913         for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
914                 struct panfrost_shader_state *ss;
915 
916                 ss = panfrost_get_shader_state(ctx, i);
917                 if (!ss)
918                         continue;
919 
920                 batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);
921         }
922 }
923 
924 void
panfrost_batch_clear(struct panfrost_batch * batch,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)925 panfrost_batch_clear(struct panfrost_batch *batch,
926                      unsigned buffers,
927                      const union pipe_color_union *color,
928                      double depth, unsigned stencil)
929 {
930         struct panfrost_context *ctx = batch->ctx;
931 
932         if (buffers & PIPE_CLEAR_COLOR) {
933                 for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
934                         if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
935                                 continue;
936 
937                         enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
938                         pan_pack_color(batch->clear_color[i], color, format, false);
939                 }
940         }
941 
942         if (buffers & PIPE_CLEAR_DEPTH) {
943                 batch->clear_depth = depth;
944         }
945 
946         if (buffers & PIPE_CLEAR_STENCIL) {
947                 batch->clear_stencil = stencil;
948         }
949 
950         batch->clear |= buffers;
951         batch->resolve |= buffers;
952 
953         /* Clearing affects the entire framebuffer (by definition -- this is
954          * the Gallium clear callback, which clears the whole framebuffer. If
955          * the scissor test were enabled from the GL side, the gallium frontend
956          * would emit a quad instead and we wouldn't go down this code path) */
957 
958         panfrost_batch_union_scissor(batch, 0, 0,
959                                      ctx->pipe_framebuffer.width,
960                                      ctx->pipe_framebuffer.height);
961 }
962 
963 /* Given a new bounding rectangle (scissor), let the job cover the union of the
964  * new and old bounding rectangles */
965 
966 void
panfrost_batch_union_scissor(struct panfrost_batch * batch,unsigned minx,unsigned miny,unsigned maxx,unsigned maxy)967 panfrost_batch_union_scissor(struct panfrost_batch *batch,
968                              unsigned minx, unsigned miny,
969                              unsigned maxx, unsigned maxy)
970 {
971         batch->minx = MIN2(batch->minx, minx);
972         batch->miny = MIN2(batch->miny, miny);
973         batch->maxx = MAX2(batch->maxx, maxx);
974         batch->maxy = MAX2(batch->maxy, maxy);
975 }
976 
977 /**
978  * Checks if rasterization should be skipped. If not, a TILER job must be
979  * created for each draw, or the IDVS flow must be used.
980  *
981  * As a special case, if there is no vertex shader, no primitives are generated,
982  * meaning the whole pipeline (including rasterization) should be skipped.
983  */
984 bool
panfrost_batch_skip_rasterization(struct panfrost_batch * batch)985 panfrost_batch_skip_rasterization(struct panfrost_batch *batch)
986 {
987         struct panfrost_context *ctx = batch->ctx;
988         struct pipe_rasterizer_state *rast = (void *) ctx->rasterizer;
989 
990         return (rast->rasterizer_discard ||
991                 batch->scissor_culls_everything ||
992                 !batch->rsd[PIPE_SHADER_VERTEX]);
993 }
994