• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/format/u_format.h"
25 #include "util/macros.h"
26 #include "v3d_context.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/common/v3d_tiling.h"
29 #include "broadcom/common/v3d_util.h"
30 #include "broadcom/cle/v3dx_pack.h"
31 
32 #define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 |                   \
33                                   PIPE_CLEAR_COLOR1 |                   \
34                                   PIPE_CLEAR_COLOR2 |                   \
35                                   PIPE_CLEAR_COLOR3)                    \
36 
37 #define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1)
38 
39 static void
load_general(struct v3d_cl * cl,struct pipe_surface * psurf,int buffer,int layer,uint32_t pipe_bit,uint32_t * loads_pending)40 load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
41              int layer, uint32_t pipe_bit, uint32_t *loads_pending)
42 {
43         struct v3d_surface *surf = v3d_surface(psurf);
44         bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
45         if (separate_stencil) {
46                 psurf = surf->separate_stencil;
47                 surf = v3d_surface(psurf);
48         }
49 
50         struct v3d_resource *rsc = v3d_resource(psurf->texture);
51 
52         uint32_t layer_offset =
53                 v3d_layer_offset(&rsc->base, psurf->u.tex.level,
54                                  psurf->u.tex.first_layer + layer);
55         cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
56                 load.buffer_to_load = buffer;
57                 load.address = cl_address(rsc->bo, layer_offset);
58 
59                 load.memory_format = surf->tiling;
60                 if (separate_stencil)
61                         load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
62                 else
63                         load.input_image_format = surf->format;
64                 load.r_b_swap = surf->swap_rb;
65                 load.force_alpha_1 = util_format_has_alpha1(psurf->format);
66                 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
67                     surf->tiling == V3D_TILING_UIF_XOR) {
68                         load.height_in_ub_or_stride =
69                                 surf->padded_height_of_output_image_in_uif_blocks;
70                 } else if (surf->tiling == V3D_TILING_RASTER) {
71                         struct v3d_resource_slice *slice =
72                                 &rsc->slices[psurf->u.tex.level];
73                         load.height_in_ub_or_stride = slice->stride;
74                 }
75 
76                 if (psurf->texture->nr_samples > 1)
77                         load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
78                 else
79                         load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
80 
81         }
82 
83         *loads_pending &= ~pipe_bit;
84 }
85 
86 static void
store_general(struct v3d_job * job,struct v3d_cl * cl,struct pipe_surface * psurf,int layer,int buffer,int pipe_bit,uint32_t * stores_pending,bool general_color_clear,bool resolve_4x)87 store_general(struct v3d_job *job,
88               struct v3d_cl *cl, struct pipe_surface *psurf,
89               int layer, int buffer, int pipe_bit,
90               uint32_t *stores_pending, bool general_color_clear,
91               bool resolve_4x)
92 {
93         struct v3d_surface *surf = v3d_surface(psurf);
94         bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
95         if (separate_stencil) {
96                 psurf = surf->separate_stencil;
97                 surf = v3d_surface(psurf);
98         }
99 
100         *stores_pending &= ~pipe_bit;
101 
102         struct v3d_resource *rsc = v3d_resource(psurf->texture);
103 
104         rsc->writes++;
105         rsc->graphics_written = true;
106 
107         uint32_t layer_offset =
108                 v3d_layer_offset(&rsc->base, psurf->u.tex.level,
109                                  psurf->u.tex.first_layer + layer);
110         cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
111                 store.buffer_to_store = buffer;
112                 store.address = cl_address(rsc->bo, layer_offset);
113 
114                 store.clear_buffer_being_stored = false;
115 
116                 if (separate_stencil)
117                         store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
118                 else
119                         store.output_image_format = surf->format;
120 
121                 store.r_b_swap = surf->swap_rb;
122                 store.memory_format = surf->tiling;
123 
124                 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
125                     surf->tiling == V3D_TILING_UIF_XOR) {
126                         store.height_in_ub_or_stride =
127                                 surf->padded_height_of_output_image_in_uif_blocks;
128                 } else if (surf->tiling == V3D_TILING_RASTER) {
129                         struct v3d_resource_slice *slice =
130                                 &rsc->slices[psurf->u.tex.level];
131                         store.height_in_ub_or_stride = slice->stride;
132                 }
133 
134                 assert(!resolve_4x || job->bbuf);
135                 if (psurf->texture->nr_samples > 1)
136                         store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
137                 else if (resolve_4x && job->bbuf->texture->nr_samples > 1)
138                         store.decimate_mode = V3D_DECIMATE_MODE_4X;
139                 else
140                         store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
141         }
142 }
143 
144 static int
zs_buffer_from_pipe_bits(int pipe_clear_bits)145 zs_buffer_from_pipe_bits(int pipe_clear_bits)
146 {
147         switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) {
148         case PIPE_CLEAR_DEPTHSTENCIL:
149                 return ZSTENCIL;
150         case PIPE_CLEAR_DEPTH:
151                 return Z;
152         case PIPE_CLEAR_STENCIL:
153                 return STENCIL;
154         default:
155                 return NONE;
156         }
157 }
158 
159 static void
v3d_rcl_emit_loads(struct v3d_job * job,struct v3d_cl * cl,int layer)160 v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
161 {
162         /* When blitting, no color or zs buffer is loaded; instead the blit
163          * source buffer is loaded for the aspects that we are going to blit.
164          */
165         assert(!job->bbuf || job->load == 0);
166         assert(!job->bbuf || job->nr_cbufs <= 1);
167 
168         uint32_t loads_pending = job->bbuf ? job->store : job->load;
169 
170         for (int i = 0; i < job->nr_cbufs; i++) {
171                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
172                 if (!(loads_pending & bit))
173                         continue;
174 
175                 struct pipe_surface *psurf = job->bbuf ? job->bbuf : job->cbufs[i];
176                 assert(!job->bbuf || i == 0);
177 
178                 if (!psurf)
179                         continue;
180 
181                 load_general(cl, psurf, RENDER_TARGET_0 + i, layer,
182                              bit, &loads_pending);
183         }
184 
185         if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
186                 assert(!job->early_zs_clear);
187                 struct pipe_surface *src = job->bbuf ? job->bbuf : job->zsbuf;
188                 struct v3d_resource *rsc = v3d_resource(src->texture);
189 
190                 if (rsc->separate_stencil &&
191                     (loads_pending & PIPE_CLEAR_STENCIL)) {
192                         load_general(cl, src,
193                                      STENCIL, layer,
194                                      PIPE_CLEAR_STENCIL,
195                                      &loads_pending);
196                 }
197 
198                 if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
199                         load_general(cl, src,
200                                      zs_buffer_from_pipe_bits(loads_pending),
201                                      layer,
202                                      loads_pending & PIPE_CLEAR_DEPTHSTENCIL,
203                                      &loads_pending);
204                 }
205         }
206 
207         assert(!loads_pending);
208         cl_emit(cl, END_OF_LOADS, end);
209 }
210 
211 static void
v3d_rcl_emit_stores(struct v3d_job * job,struct v3d_cl * cl,int layer)212 v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
213 {
214         bool general_color_clear = false;
215         uint32_t stores_pending = job->store;
216 
217         /* For V3D 4.1, use general stores for all TLB stores.
218          *
219          * For V3D 3.3, we only use general stores to do raw stores for any
220          * MSAA surfaces.  These output UIF tiled images where each 4x MSAA
221          * pixel is a 2x2 quad, and the format will be that of the
222          * internal_type/internal_bpp, rather than the format from GL's
223          * perspective.  Non-MSAA surfaces will use
224          * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED.
225          */
226         assert(!job->bbuf || job->nr_cbufs <= 1);
227         for (int i = 0; i < job->nr_cbufs; i++) {
228                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
229                 if (!(job->store & bit))
230                         continue;
231 
232                 struct pipe_surface *psurf = job->cbufs[i];
233                 if (!psurf)
234                         continue;
235 
236                 store_general(job, cl, psurf, layer, RENDER_TARGET_0 + i, bit,
237                               &stores_pending, general_color_clear, job->bbuf);
238         }
239 
240         if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf) {
241                 assert(!job->early_zs_clear);
242                 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
243                 if (rsc->separate_stencil) {
244                         if (job->store & PIPE_CLEAR_DEPTH) {
245                                 store_general(job, cl, job->zsbuf, layer,
246                                               Z, PIPE_CLEAR_DEPTH,
247                                               &stores_pending,
248                                               general_color_clear,
249                                               false);
250                         }
251 
252                         if (job->store & PIPE_CLEAR_STENCIL) {
253                                 store_general(job, cl, job->zsbuf, layer,
254                                               STENCIL, PIPE_CLEAR_STENCIL,
255                                               &stores_pending,
256                                               general_color_clear,
257                                               false);
258                         }
259                 } else {
260                         store_general(job, cl, job->zsbuf, layer,
261                                       zs_buffer_from_pipe_bits(job->store),
262                                       job->store & PIPE_CLEAR_DEPTHSTENCIL,
263                                       &stores_pending, general_color_clear,
264                                       false);
265                 }
266         }
267 
268 
269         /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments,
270          * we still need to emit some sort of store.
271          */
272         if (!job->store) {
273                 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
274                         store.buffer_to_store = NONE;
275                 }
276         }
277 
278         assert(!stores_pending);
279 
280         /* GFXH-1461/GFXH-1689: The per-buffer store command's clear
281          * buffer bit is broken for depth/stencil.  In addition, the
282          * clear packet's Z/S bit is broken, but the RTs bit ends up
283          * clearing Z/S.
284          */
285         if (job->clear) {
286 #if V3D_VERSION == 42
287                 cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
288                         clear.clear_z_stencil_buffer = !job->early_zs_clear;
289                         clear.clear_all_render_targets = true;
290                 }
291 #endif
292 #if V3D_VERSION >= 71
293                 cl_emit(cl, CLEAR_RENDER_TARGETS, clear);
294 #endif
295 
296         }
297 }
298 
299 static void
v3d_rcl_emit_generic_per_tile_list(struct v3d_job * job,int layer)300 v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
301 {
302         /* Emit the generic list in our indirect state -- the rcl will just
303          * have pointers into it.
304          */
305         struct v3d_cl *cl = &job->indirect;
306         v3d_cl_ensure_space(cl, 200, 1);
307         struct v3d_cl_reloc tile_list_start = cl_get_address(cl);
308 
309         /* V3D 4.x/7.x only requires a single tile coordinates, and
310          * END_OF_LOADS switches us between loading and rendering.
311          */
312         cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
313 
314         v3d_rcl_emit_loads(job, cl, layer);
315 
316         /* The binner starts out writing tiles assuming that the initial mode
317          * is triangles, so make sure that's the case.
318          */
319         cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
320                 fmt.primitive_type = LIST_TRIANGLES;
321         }
322 
323         /* PTB assumes that value to be 0, but hw will not set it. */
324         cl_emit(cl, SET_INSTANCEID, set) {
325            set.instance_id = 0;
326         }
327 
328         cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
329 
330         v3d_rcl_emit_stores(job, cl, layer);
331 
332         cl_emit(cl, END_OF_TILE_MARKER, end);
333 
334         cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
335 
336         cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
337                 branch.start = tile_list_start;
338                 branch.end = cl_get_address(cl);
339         }
340 }
341 
342 /* Note that for v71, render target cfg packets has just one field that
343  * combined the internal type and clamp mode. For simplicity we keep just one
344  * helper.
345  *
346  * Note: rt_type is in fact a "enum V3DX(Internal_Type)".
347  *
348  */
349 static uint32_t
v3dX(clamp_for_format_and_type)350 v3dX(clamp_for_format_and_type)(uint32_t rt_type,
351                                 enum pipe_format format)
352 {
353 #if V3D_VERSION == 42
354         if (util_format_is_srgb(format)) {
355                 return V3D_RENDER_TARGET_CLAMP_NORM;
356         } else if (util_format_is_pure_integer(format)) {
357                 return V3D_RENDER_TARGET_CLAMP_INT;
358         } else {
359                 return V3D_RENDER_TARGET_CLAMP_NONE;
360         }
361 #endif
362 #if V3D_VERSION >= 71
363         switch (rt_type) {
364         case V3D_INTERNAL_TYPE_8I:
365                 return V3D_RENDER_TARGET_TYPE_CLAMP_8I_CLAMPED;
366         case V3D_INTERNAL_TYPE_8UI:
367                 return V3D_RENDER_TARGET_TYPE_CLAMP_8UI_CLAMPED;
368         case V3D_INTERNAL_TYPE_8:
369                 return V3D_RENDER_TARGET_TYPE_CLAMP_8;
370         case V3D_INTERNAL_TYPE_16I:
371                 return V3D_RENDER_TARGET_TYPE_CLAMP_16I_CLAMPED;
372         case V3D_INTERNAL_TYPE_16UI:
373                 return V3D_RENDER_TARGET_TYPE_CLAMP_16UI_CLAMPED;
374         case V3D_INTERNAL_TYPE_16F:
375                 return util_format_is_srgb(format) ?
376                         V3D_RENDER_TARGET_TYPE_CLAMP_16F_CLAMP_NORM :
377                         V3D_RENDER_TARGET_TYPE_CLAMP_16F;
378         case V3D_INTERNAL_TYPE_32I:
379                 return V3D_RENDER_TARGET_TYPE_CLAMP_32I_CLAMPED;
380         case V3D_INTERNAL_TYPE_32UI:
381                 return V3D_RENDER_TARGET_TYPE_CLAMP_32UI_CLAMPED;
382         case V3D_INTERNAL_TYPE_32F:
383                 return V3D_RENDER_TARGET_TYPE_CLAMP_32F;
384         default:
385                 unreachable("Unknown internal render target type");
386         }
387         return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID;
388 #endif
389         unreachable("Wrong V3D_VERSION");
390 }
391 
392 #if V3D_VERSION >= 71
393 static void
v3d_setup_render_target(struct v3d_job * job,int cbuf,uint32_t * rt_bpp,uint32_t * rt_type_clamp)394 v3d_setup_render_target(struct v3d_job *job,
395                         int cbuf,
396                         uint32_t *rt_bpp,
397                         uint32_t *rt_type_clamp)
398 {
399         if (!job->cbufs[cbuf])
400                 return;
401 
402         struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]);
403         *rt_bpp = surf->internal_bpp;
404         if (job->bbuf) {
405            struct v3d_surface *bsurf = v3d_surface(job->bbuf);
406            *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp);
407         }
408         *rt_type_clamp = v3dX(clamp_for_format_and_type)(surf->internal_type,
409                                                          surf->base.format);
410 }
411 #endif
412 
413 #if V3D_VERSION == 42
414 static void
v3d_setup_render_target(struct v3d_job * job,int cbuf,uint32_t * rt_bpp,uint32_t * rt_type,uint32_t * rt_clamp)415 v3d_setup_render_target(struct v3d_job *job,
416                         int cbuf,
417                         uint32_t *rt_bpp,
418                         uint32_t *rt_type,
419                         uint32_t *rt_clamp)
420 {
421         if (!job->cbufs[cbuf])
422                 return;
423 
424         struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]);
425         *rt_bpp = surf->internal_bpp;
426         if (job->bbuf) {
427            struct v3d_surface *bsurf = v3d_surface(job->bbuf);
428            *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp);
429         }
430         *rt_type = surf->internal_type;
431         *rt_clamp = v3dX(clamp_for_format_and_type)(surf->internal_type,
432                                                     surf->base.format);
433 }
434 #endif
435 
436 static bool
supertile_in_job_scissors(struct v3d_job * job,uint32_t x,uint32_t y,uint32_t w,uint32_t h)437 supertile_in_job_scissors(struct v3d_job *job,
438                           uint32_t x, uint32_t y, uint32_t w, uint32_t h)
439 {
440    if (job->scissor.disabled || job->scissor.count == 0)
441       return true;
442 
443    const uint32_t min_x = x * w;
444    const uint32_t min_y = y * h;
445    const uint32_t max_x = min_x + w - 1;
446    const uint32_t max_y = min_y + h - 1;
447 
448    for (uint32_t i = 0; i < job->scissor.count; i++) {
449            const uint32_t min_s_x = job->scissor.rects[i].min_x;
450            const uint32_t min_s_y = job->scissor.rects[i].min_y;
451            const uint32_t max_s_x = job->scissor.rects[i].max_x;
452            const uint32_t max_s_y = job->scissor.rects[i].max_y;
453 
454            if (max_x < min_s_x || min_x > max_s_x ||
455                max_y < min_s_y || min_y > max_s_y) {
456                    continue;
457            }
458 
459            return true;
460    }
461 
462    return false;
463 }
464 
465 static inline bool
do_double_initial_tile_clear(const struct v3d_job * job)466 do_double_initial_tile_clear(const struct v3d_job *job)
467 {
468         /* Our rendering code emits an initial clear per layer, unlike the
469          * Vulkan driver, which only executes a single initial clear for all
470          * layers. This is because in GL we don't use the
471          * 'clear_buffer_being_stored' bit when storing tiles, so each layer
472          * needs the iniital clear. This is also why this helper, unlike the
473          * Vulkan version, doesn't check the layer count to decide if double
474          * clear for double buffer mode is required.
475          */
476         return job->double_buffer &&
477                (job->draw_tiles_x > 1 || job->draw_tiles_y > 1);
478 }
479 
480 static void
emit_render_layer(struct v3d_job * job,uint32_t layer)481 emit_render_layer(struct v3d_job *job, uint32_t layer)
482 {
483         uint32_t supertile_w = 1, supertile_h = 1;
484 
485         /* If doing multicore binning, we would need to initialize each
486          * core's tile list here.
487          */
488         uint32_t tile_alloc_offset =
489                 layer * job->draw_tiles_x * job->draw_tiles_y * 64;
490         cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
491                 list.address = cl_address(job->tile_alloc, tile_alloc_offset);
492         }
493 
494         cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
495                 uint32_t frame_w_in_supertiles, frame_h_in_supertiles;
496                 const uint32_t max_supertiles = 256;
497 
498                 /* Size up our supertiles until we get under the limit. */
499                 for (;;) {
500                         frame_w_in_supertiles = DIV_ROUND_UP(job->draw_tiles_x,
501                                                              supertile_w);
502                         frame_h_in_supertiles = DIV_ROUND_UP(job->draw_tiles_y,
503                                                              supertile_h);
504                         if (frame_w_in_supertiles *
505                                 frame_h_in_supertiles < max_supertiles) {
506                                 break;
507                         }
508 
509                         if (supertile_w < supertile_h)
510                                 supertile_w++;
511                         else
512                                 supertile_h++;
513                 }
514 
515                 config.number_of_bin_tile_lists = 1;
516                 config.total_frame_width_in_tiles = job->draw_tiles_x;
517                 config.total_frame_height_in_tiles = job->draw_tiles_y;
518 
519                 config.supertile_width_in_tiles = supertile_w;
520                 config.supertile_height_in_tiles = supertile_h;
521 
522                 config.total_frame_width_in_supertiles = frame_w_in_supertiles;
523                 config.total_frame_height_in_supertiles = frame_h_in_supertiles;
524         }
525 
526         /* Start by clearing the tile buffer. */
527         cl_emit(&job->rcl, TILE_COORDINATES, coords) {
528                 coords.tile_column_number = 0;
529                 coords.tile_row_number = 0;
530         }
531 
532         /* Emit an initial clear of the tile buffers.  This is necessary
533          * for any buffers that should be cleared (since clearing
534          * normally happens at the *end* of the generic tile list), but
535          * it's also nice to clear everything so the first tile doesn't
536          * inherit any contents from some previous frame.
537          *
538          * Also, implement the GFXH-1742 workaround.  There's a race in
539          * the HW between the RCL updating the TLB's internal type/size
540          * and thespawning of the QPU instances using the TLB's current
541          * internal type/size.  To make sure the QPUs get the right
542          * state, we need 1 dummy store in between internal type/size
543          * changes on V3D 3.x, and 2 dummy stores on 4.x.
544          */
545         for (int i = 0; i < 2; i++) {
546                 if (i > 0)
547                         cl_emit(&job->rcl, TILE_COORDINATES, coords);
548                 cl_emit(&job->rcl, END_OF_LOADS, end);
549                 cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
550                         store.buffer_to_store = NONE;
551                 }
552 
553                 if (i == 0 || do_double_initial_tile_clear(job)) {
554 #if V3D_VERSION < 71
555                         cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) {
556                                 clear.clear_z_stencil_buffer = !job->early_zs_clear;
557                                 clear.clear_all_render_targets = true;
558                         }
559 #else
560                         cl_emit(&job->rcl, CLEAR_RENDER_TARGETS, clear);
561 #endif
562                 }
563                 cl_emit(&job->rcl, END_OF_TILE_MARKER, end);
564         }
565         cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
566 
567         v3d_rcl_emit_generic_per_tile_list(job, layer);
568 
569         /* XXX perf: We should expose GL_MESA_tile_raster_order to
570          * improve X11 performance, but we should use Morton order
571          * otherwise to improve cache locality.
572          */
573         uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
574         uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
575         uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
576         uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels;
577 
578         uint32_t max_x_supertile = 0;
579         uint32_t max_y_supertile = 0;
580         if (job->draw_max_x != 0 && job->draw_max_y != 0) {
581                 max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels;
582                 max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels;
583         }
584 
585         for (int y = min_y_supertile; y <= max_y_supertile; y++) {
586                 for (int x = min_x_supertile; x <= max_x_supertile; x++) {
587                         if (supertile_in_job_scissors(job, x, y,
588                                                       supertile_w_in_pixels,
589                                                       supertile_h_in_pixels)) {
590                                 cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) {
591                                       coords.column_number_in_supertiles = x;
592                                       coords.row_number_in_supertiles = y;
593                                 }
594                         }
595                 }
596         }
597 }
598 
599 void
v3dX(emit_rcl)600 v3dX(emit_rcl)(struct v3d_job *job)
601 {
602         /* The RCL list should be empty. */
603         assert(!job->rcl.bo);
604 
605         v3d_cl_ensure_space_with_branch(&job->rcl, 200 +
606                                         MAX2(job->num_layers, 1) * 256 *
607                                         cl_packet_length(SUPERTILE_COORDINATES));
608         job->submit.rcl_start = job->rcl.bo->offset;
609         v3d_job_add_bo(job, job->rcl.bo);
610 
611         /* Common config must be the first TILE_RENDERING_MODE_CFG
612          * and Z_STENCIL_CLEAR_VALUES must be last.  The ones in between are
613          * optional updates to the previous HW state.
614          */
615         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
616                 if (job->zsbuf) {
617                         struct v3d_surface *surf = v3d_surface(job->zsbuf);
618                         config.internal_depth_type = surf->internal_type;
619                 }
620 
621                 if (job->decided_global_ez_enable) {
622                         switch (job->first_ez_state) {
623                         case V3D_EZ_UNDECIDED:
624                         case V3D_EZ_LT_LE:
625                                 config.early_z_disable = false;
626                                 config.early_z_test_and_update_direction =
627                                         EARLY_Z_DIRECTION_LT_LE;
628                                 break;
629                         case V3D_EZ_GT_GE:
630                                 config.early_z_disable = false;
631                                 config.early_z_test_and_update_direction =
632                                         EARLY_Z_DIRECTION_GT_GE;
633                                 break;
634                         case V3D_EZ_DISABLED:
635                                 config.early_z_disable = true;
636                         }
637                 } else {
638                         assert(job->draw_calls_queued == 0);
639                         config.early_z_disable = true;
640                 }
641 
642                 assert(job->zsbuf || config.early_z_disable);
643 
644                 job->early_zs_clear = (job->clear & PIPE_CLEAR_DEPTHSTENCIL) &&
645                         !(job->load & PIPE_CLEAR_DEPTHSTENCIL) &&
646                         !(job->store & PIPE_CLEAR_DEPTHSTENCIL);
647 
648                 config.early_depth_stencil_clear = job->early_zs_clear;
649 
650                 config.image_width_pixels = job->draw_width;
651                 config.image_height_pixels = job->draw_height;
652 
653                 config.number_of_render_targets = MAX2(job->nr_cbufs, 1);
654 
655                 assert(!job->msaa || !job->double_buffer);
656                 config.multisample_mode_4x = job->msaa;
657                 config.double_buffer_in_non_ms_mode = job->double_buffer;
658 
659 #if V3D_VERSION == 42
660                 config.maximum_bpp_of_all_render_targets = job->internal_bpp;
661 #endif
662 #if V3D_VERSION >= 71
663                 config.log2_tile_width = log2_tile_size(job->tile_width);
664                 config.log2_tile_height = log2_tile_size(job->tile_height);
665 
666                 /* FIXME: ideallly we would like next assert on the packet header (as is
667                  * general, so also applies to GL). We would need to expand
668                  * gen_pack_header for that.
669                  */
670                 assert(config.log2_tile_width == config.log2_tile_height ||
671                        config.log2_tile_width == config.log2_tile_height + 1);
672 #endif
673 
674         }
675 
676 #if V3D_VERSION >= 71
677         uint32_t base_addr = 0;
678 
679         /* If we don't have any color RTs, we sill need to emit one and flag
680          * it as not used using stride = 1
681          */
682         if (job->nr_cbufs == 0) {
683            cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
684               rt.stride = 1; /* Unused */
685            }
686         }
687 #endif
688         for (int i = 0; i < job->nr_cbufs; i++) {
689                 struct pipe_surface *psurf = job->cbufs[i];
690                 if (!psurf) {
691 #if V3D_VERSION >= 71
692                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
693                                 rt.render_target_number = i;
694                                 rt.stride = 1; /* Unused */
695                         }
696 #endif
697                         continue;
698                 }
699 
700                 struct v3d_surface *surf = v3d_surface(psurf);
701                 struct v3d_resource *rsc = v3d_resource(psurf->texture);
702 
703                 UNUSED uint32_t config_pad = 0;
704                 UNUSED uint32_t clear_pad = 0;
705 
706                 /* XXX: Set the pad for raster. */
707                 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
708                     surf->tiling == V3D_TILING_UIF_XOR) {
709                         int uif_block_height = v3d_utile_height(rsc->cpp) * 2;
710                         uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) /
711                                                            uif_block_height);
712                         if (surf->padded_height_of_output_image_in_uif_blocks -
713                             implicit_padded_height < 15) {
714                                 config_pad = (surf->padded_height_of_output_image_in_uif_blocks -
715                                               implicit_padded_height);
716                         } else {
717                                 config_pad = 15;
718                                 clear_pad = surf->padded_height_of_output_image_in_uif_blocks;
719                         }
720                 }
721 
722 #if V3D_VERSION == 42
723                 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1,
724                         clear) {
725                         clear.clear_color_low_32_bits = job->clear_color[i][0];
726                         clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff;
727                         clear.render_target_number = i;
728                 };
729 
730                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
731                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2,
732                                 clear) {
733                                 clear.clear_color_mid_low_32_bits =
734                                         ((job->clear_color[i][1] >> 24) |
735                                          (job->clear_color[i][2] << 8));
736                                 clear.clear_color_mid_high_24_bits =
737                                         ((job->clear_color[i][2] >> 24) |
738                                          ((job->clear_color[i][3] & 0xffff) << 8));
739                                 clear.render_target_number = i;
740                         };
741                 }
742 
743                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
744                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3,
745                                 clear) {
746                                 clear.uif_padded_height_in_uif_blocks = clear_pad;
747                                 clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16;
748                                 clear.render_target_number = i;
749                         };
750                 }
751 #endif
752 #if V3D_VERSION >= 71
753                 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
754                         rt.clear_color_low_bits = job->clear_color[i][0];
755                         v3d_setup_render_target(job, i, &rt.internal_bpp,
756                                                 &rt.internal_type_and_clamping);
757                         rt.stride =
758                                 v3d_compute_rt_row_row_stride_128_bits(job->tile_width,
759                                                                        v3d_internal_bpp_words(rt.internal_bpp));
760                         rt.base_address = base_addr;
761                         rt.render_target_number = i;
762 
763                         base_addr += (job->tile_height * rt.stride) / 8;
764                 }
765 
766                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
767                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) {
768                                 rt.clear_color_mid_bits = /* 40 bits (32 + 8)  */
769                                         ((uint64_t) job->clear_color[i][1]) |
770                                         (((uint64_t) (job->clear_color[i][2] & 0xff)) << 32);
771                                 rt.render_target_number = i;
772                         }
773                 }
774 
775                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_128) {
776                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) {
777                                 rt.clear_color_top_bits = /* 56 bits (24 + 32) */
778                                         (((uint64_t) (job->clear_color[i][2] & 0xffffff00)) >> 8) |
779                                         (((uint64_t) (job->clear_color[i][3])) << 24);
780                                 rt.render_target_number = i;
781                         }
782                 }
783 #endif
784         }
785 
786 #if V3D_VERSION == 42
787         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
788                 v3d_setup_render_target(job, 0,
789                                         &rt.render_target_0_internal_bpp,
790                                         &rt.render_target_0_internal_type,
791                                         &rt.render_target_0_clamp);
792                 v3d_setup_render_target(job, 1,
793                                         &rt.render_target_1_internal_bpp,
794                                         &rt.render_target_1_internal_type,
795                                         &rt.render_target_1_clamp);
796                 v3d_setup_render_target(job, 2,
797                                         &rt.render_target_2_internal_bpp,
798                                         &rt.render_target_2_internal_type,
799                                         &rt.render_target_2_clamp);
800                 v3d_setup_render_target(job, 3,
801                                         &rt.render_target_3_internal_bpp,
802                                         &rt.render_target_3_internal_type,
803                                         &rt.render_target_3_clamp);
804         }
805 #endif
806 
807         /* Ends rendering mode config. */
808         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES,
809                 clear) {
810                 clear.z_clear_value = job->clear_z;
811                 clear.stencil_clear_value = job->clear_s;
812         };
813 
814         /* Always set initial block size before the first branch, which needs
815          * to match the value from binning mode config.
816          */
817         cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
818                 init.use_auto_chained_tile_lists = true;
819                 init.size_of_first_block_in_chained_tile_lists =
820                         TILE_ALLOCATION_BLOCK_SIZE_64B;
821         }
822 
823         /* ARB_framebuffer_no_attachments allows rendering to happen even when
824          * the framebuffer has no attachments, the idea being that fragment
825          * shaders can still do image load/store, ssbo, etc without having to
826          * write to actual attachments, so always run at least one iteration
827          * of the loop.
828          */
829         assert(job->num_layers > 0 || (job->load == 0 && job->store == 0));
830         for (int layer = 0; layer < MAX2(1, job->num_layers); layer++)
831                 emit_render_layer(job, layer);
832 
833         cl_emit(&job->rcl, END_OF_RENDERING, end);
834 }
835