1 /*
2 * Copyright © 2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/format/u_format.h"
25 #include "util/macros.h"
26 #include "v3d_context.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/common/v3d_tiling.h"
29 #include "broadcom/common/v3d_util.h"
30 #include "broadcom/cle/v3dx_pack.h"
31
32 #define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 | \
33 PIPE_CLEAR_COLOR1 | \
34 PIPE_CLEAR_COLOR2 | \
35 PIPE_CLEAR_COLOR3) \
36
37 #define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1)
38
39 static void
load_general(struct v3d_cl * cl,struct pipe_surface * psurf,int buffer,int layer,uint32_t pipe_bit,uint32_t * loads_pending)40 load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
41 int layer, uint32_t pipe_bit, uint32_t *loads_pending)
42 {
43 struct v3d_surface *surf = v3d_surface(psurf);
44 bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
45 if (separate_stencil) {
46 psurf = surf->separate_stencil;
47 surf = v3d_surface(psurf);
48 }
49
50 struct v3d_resource *rsc = v3d_resource(psurf->texture);
51
52 uint32_t layer_offset =
53 v3d_layer_offset(&rsc->base, psurf->u.tex.level,
54 psurf->u.tex.first_layer + layer);
55 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
56 load.buffer_to_load = buffer;
57 load.address = cl_address(rsc->bo, layer_offset);
58
59 load.memory_format = surf->tiling;
60 if (separate_stencil)
61 load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
62 else
63 load.input_image_format = surf->format;
64 load.r_b_swap = surf->swap_rb;
65 load.force_alpha_1 = util_format_has_alpha1(psurf->format);
66 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
67 surf->tiling == V3D_TILING_UIF_XOR) {
68 load.height_in_ub_or_stride =
69 surf->padded_height_of_output_image_in_uif_blocks;
70 } else if (surf->tiling == V3D_TILING_RASTER) {
71 struct v3d_resource_slice *slice =
72 &rsc->slices[psurf->u.tex.level];
73 load.height_in_ub_or_stride = slice->stride;
74 }
75
76 if (psurf->texture->nr_samples > 1)
77 load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
78 else
79 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
80
81 }
82
83 *loads_pending &= ~pipe_bit;
84 }
85
86 static void
store_general(struct v3d_job * job,struct v3d_cl * cl,struct pipe_surface * psurf,int layer,int buffer,int pipe_bit,uint32_t * stores_pending,bool general_color_clear,bool resolve_4x)87 store_general(struct v3d_job *job,
88 struct v3d_cl *cl, struct pipe_surface *psurf,
89 int layer, int buffer, int pipe_bit,
90 uint32_t *stores_pending, bool general_color_clear,
91 bool resolve_4x)
92 {
93 struct v3d_surface *surf = v3d_surface(psurf);
94 bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
95 if (separate_stencil) {
96 psurf = surf->separate_stencil;
97 surf = v3d_surface(psurf);
98 }
99
100 *stores_pending &= ~pipe_bit;
101
102 struct v3d_resource *rsc = v3d_resource(psurf->texture);
103
104 rsc->writes++;
105 rsc->graphics_written = true;
106
107 uint32_t layer_offset =
108 v3d_layer_offset(&rsc->base, psurf->u.tex.level,
109 psurf->u.tex.first_layer + layer);
110 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
111 store.buffer_to_store = buffer;
112 store.address = cl_address(rsc->bo, layer_offset);
113
114 store.clear_buffer_being_stored = false;
115
116 if (separate_stencil)
117 store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
118 else
119 store.output_image_format = surf->format;
120
121 store.r_b_swap = surf->swap_rb;
122 store.memory_format = surf->tiling;
123
124 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
125 surf->tiling == V3D_TILING_UIF_XOR) {
126 store.height_in_ub_or_stride =
127 surf->padded_height_of_output_image_in_uif_blocks;
128 } else if (surf->tiling == V3D_TILING_RASTER) {
129 struct v3d_resource_slice *slice =
130 &rsc->slices[psurf->u.tex.level];
131 store.height_in_ub_or_stride = slice->stride;
132 }
133
134 assert(!resolve_4x || job->bbuf);
135 if (psurf->texture->nr_samples > 1)
136 store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
137 else if (resolve_4x && job->bbuf->texture->nr_samples > 1)
138 store.decimate_mode = V3D_DECIMATE_MODE_4X;
139 else
140 store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
141 }
142 }
143
144 static int
zs_buffer_from_pipe_bits(int pipe_clear_bits)145 zs_buffer_from_pipe_bits(int pipe_clear_bits)
146 {
147 switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) {
148 case PIPE_CLEAR_DEPTHSTENCIL:
149 return ZSTENCIL;
150 case PIPE_CLEAR_DEPTH:
151 return Z;
152 case PIPE_CLEAR_STENCIL:
153 return STENCIL;
154 default:
155 return NONE;
156 }
157 }
158
159 static void
v3d_rcl_emit_loads(struct v3d_job * job,struct v3d_cl * cl,int layer)160 v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
161 {
162 /* When blitting, no color or zs buffer is loaded; instead the blit
163 * source buffer is loaded for the aspects that we are going to blit.
164 */
165 assert(!job->bbuf || job->load == 0);
166 assert(!job->bbuf || job->nr_cbufs <= 1);
167
168 uint32_t loads_pending = job->bbuf ? job->store : job->load;
169
170 for (int i = 0; i < job->nr_cbufs; i++) {
171 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
172 if (!(loads_pending & bit))
173 continue;
174
175 struct pipe_surface *psurf = job->bbuf ? job->bbuf : job->cbufs[i];
176 assert(!job->bbuf || i == 0);
177
178 if (!psurf)
179 continue;
180
181 load_general(cl, psurf, RENDER_TARGET_0 + i, layer,
182 bit, &loads_pending);
183 }
184
185 if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
186 assert(!job->early_zs_clear);
187 struct pipe_surface *src = job->bbuf ? job->bbuf : job->zsbuf;
188 struct v3d_resource *rsc = v3d_resource(src->texture);
189
190 if (rsc->separate_stencil &&
191 (loads_pending & PIPE_CLEAR_STENCIL)) {
192 load_general(cl, src,
193 STENCIL, layer,
194 PIPE_CLEAR_STENCIL,
195 &loads_pending);
196 }
197
198 if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
199 load_general(cl, src,
200 zs_buffer_from_pipe_bits(loads_pending),
201 layer,
202 loads_pending & PIPE_CLEAR_DEPTHSTENCIL,
203 &loads_pending);
204 }
205 }
206
207 assert(!loads_pending);
208 cl_emit(cl, END_OF_LOADS, end);
209 }
210
211 static void
v3d_rcl_emit_stores(struct v3d_job * job,struct v3d_cl * cl,int layer)212 v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
213 {
214 bool general_color_clear = false;
215 uint32_t stores_pending = job->store;
216
217 /* For V3D 4.1, use general stores for all TLB stores.
218 *
219 * For V3D 3.3, we only use general stores to do raw stores for any
220 * MSAA surfaces. These output UIF tiled images where each 4x MSAA
221 * pixel is a 2x2 quad, and the format will be that of the
222 * internal_type/internal_bpp, rather than the format from GL's
223 * perspective. Non-MSAA surfaces will use
224 * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED.
225 */
226 assert(!job->bbuf || job->nr_cbufs <= 1);
227 for (int i = 0; i < job->nr_cbufs; i++) {
228 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
229 if (!(job->store & bit))
230 continue;
231
232 struct pipe_surface *psurf = job->cbufs[i];
233 if (!psurf)
234 continue;
235
236 store_general(job, cl, psurf, layer, RENDER_TARGET_0 + i, bit,
237 &stores_pending, general_color_clear, job->bbuf);
238 }
239
240 if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf) {
241 assert(!job->early_zs_clear);
242 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
243 if (rsc->separate_stencil) {
244 if (job->store & PIPE_CLEAR_DEPTH) {
245 store_general(job, cl, job->zsbuf, layer,
246 Z, PIPE_CLEAR_DEPTH,
247 &stores_pending,
248 general_color_clear,
249 false);
250 }
251
252 if (job->store & PIPE_CLEAR_STENCIL) {
253 store_general(job, cl, job->zsbuf, layer,
254 STENCIL, PIPE_CLEAR_STENCIL,
255 &stores_pending,
256 general_color_clear,
257 false);
258 }
259 } else {
260 store_general(job, cl, job->zsbuf, layer,
261 zs_buffer_from_pipe_bits(job->store),
262 job->store & PIPE_CLEAR_DEPTHSTENCIL,
263 &stores_pending, general_color_clear,
264 false);
265 }
266 }
267
268
269 /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments,
270 * we still need to emit some sort of store.
271 */
272 if (!job->store) {
273 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
274 store.buffer_to_store = NONE;
275 }
276 }
277
278 assert(!stores_pending);
279
280 /* GFXH-1461/GFXH-1689: The per-buffer store command's clear
281 * buffer bit is broken for depth/stencil. In addition, the
282 * clear packet's Z/S bit is broken, but the RTs bit ends up
283 * clearing Z/S.
284 */
285 if (job->clear) {
286 #if V3D_VERSION == 42
287 cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
288 clear.clear_z_stencil_buffer = !job->early_zs_clear;
289 clear.clear_all_render_targets = true;
290 }
291 #endif
292 #if V3D_VERSION >= 71
293 cl_emit(cl, CLEAR_RENDER_TARGETS, clear);
294 #endif
295
296 }
297 }
298
299 static void
v3d_rcl_emit_generic_per_tile_list(struct v3d_job * job,int layer)300 v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
301 {
302 /* Emit the generic list in our indirect state -- the rcl will just
303 * have pointers into it.
304 */
305 struct v3d_cl *cl = &job->indirect;
306 v3d_cl_ensure_space(cl, 200, 1);
307 struct v3d_cl_reloc tile_list_start = cl_get_address(cl);
308
309 /* V3D 4.x/7.x only requires a single tile coordinates, and
310 * END_OF_LOADS switches us between loading and rendering.
311 */
312 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
313
314 v3d_rcl_emit_loads(job, cl, layer);
315
316 /* The binner starts out writing tiles assuming that the initial mode
317 * is triangles, so make sure that's the case.
318 */
319 cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
320 fmt.primitive_type = LIST_TRIANGLES;
321 }
322
323 /* PTB assumes that value to be 0, but hw will not set it. */
324 cl_emit(cl, SET_INSTANCEID, set) {
325 set.instance_id = 0;
326 }
327
328 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
329
330 v3d_rcl_emit_stores(job, cl, layer);
331
332 cl_emit(cl, END_OF_TILE_MARKER, end);
333
334 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
335
336 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
337 branch.start = tile_list_start;
338 branch.end = cl_get_address(cl);
339 }
340 }
341
342 /* Note that for v71, render target cfg packets has just one field that
343 * combined the internal type and clamp mode. For simplicity we keep just one
344 * helper.
345 *
346 * Note: rt_type is in fact a "enum V3DX(Internal_Type)".
347 *
348 */
349 static uint32_t
v3dX(clamp_for_format_and_type)350 v3dX(clamp_for_format_and_type)(uint32_t rt_type,
351 enum pipe_format format)
352 {
353 #if V3D_VERSION == 42
354 if (util_format_is_srgb(format)) {
355 return V3D_RENDER_TARGET_CLAMP_NORM;
356 } else if (util_format_is_pure_integer(format)) {
357 return V3D_RENDER_TARGET_CLAMP_INT;
358 } else {
359 return V3D_RENDER_TARGET_CLAMP_NONE;
360 }
361 #endif
362 #if V3D_VERSION >= 71
363 switch (rt_type) {
364 case V3D_INTERNAL_TYPE_8I:
365 return V3D_RENDER_TARGET_TYPE_CLAMP_8I_CLAMPED;
366 case V3D_INTERNAL_TYPE_8UI:
367 return V3D_RENDER_TARGET_TYPE_CLAMP_8UI_CLAMPED;
368 case V3D_INTERNAL_TYPE_8:
369 return V3D_RENDER_TARGET_TYPE_CLAMP_8;
370 case V3D_INTERNAL_TYPE_16I:
371 return V3D_RENDER_TARGET_TYPE_CLAMP_16I_CLAMPED;
372 case V3D_INTERNAL_TYPE_16UI:
373 return V3D_RENDER_TARGET_TYPE_CLAMP_16UI_CLAMPED;
374 case V3D_INTERNAL_TYPE_16F:
375 return util_format_is_srgb(format) ?
376 V3D_RENDER_TARGET_TYPE_CLAMP_16F_CLAMP_NORM :
377 V3D_RENDER_TARGET_TYPE_CLAMP_16F;
378 case V3D_INTERNAL_TYPE_32I:
379 return V3D_RENDER_TARGET_TYPE_CLAMP_32I_CLAMPED;
380 case V3D_INTERNAL_TYPE_32UI:
381 return V3D_RENDER_TARGET_TYPE_CLAMP_32UI_CLAMPED;
382 case V3D_INTERNAL_TYPE_32F:
383 return V3D_RENDER_TARGET_TYPE_CLAMP_32F;
384 default:
385 unreachable("Unknown internal render target type");
386 }
387 return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID;
388 #endif
389 unreachable("Wrong V3D_VERSION");
390 }
391
392 #if V3D_VERSION >= 71
393 static void
v3d_setup_render_target(struct v3d_job * job,int cbuf,uint32_t * rt_bpp,uint32_t * rt_type_clamp)394 v3d_setup_render_target(struct v3d_job *job,
395 int cbuf,
396 uint32_t *rt_bpp,
397 uint32_t *rt_type_clamp)
398 {
399 if (!job->cbufs[cbuf])
400 return;
401
402 struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]);
403 *rt_bpp = surf->internal_bpp;
404 if (job->bbuf) {
405 struct v3d_surface *bsurf = v3d_surface(job->bbuf);
406 *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp);
407 }
408 *rt_type_clamp = v3dX(clamp_for_format_and_type)(surf->internal_type,
409 surf->base.format);
410 }
411 #endif
412
413 #if V3D_VERSION == 42
414 static void
v3d_setup_render_target(struct v3d_job * job,int cbuf,uint32_t * rt_bpp,uint32_t * rt_type,uint32_t * rt_clamp)415 v3d_setup_render_target(struct v3d_job *job,
416 int cbuf,
417 uint32_t *rt_bpp,
418 uint32_t *rt_type,
419 uint32_t *rt_clamp)
420 {
421 if (!job->cbufs[cbuf])
422 return;
423
424 struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]);
425 *rt_bpp = surf->internal_bpp;
426 if (job->bbuf) {
427 struct v3d_surface *bsurf = v3d_surface(job->bbuf);
428 *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp);
429 }
430 *rt_type = surf->internal_type;
431 *rt_clamp = v3dX(clamp_for_format_and_type)(surf->internal_type,
432 surf->base.format);
433 }
434 #endif
435
436 static bool
supertile_in_job_scissors(struct v3d_job * job,uint32_t x,uint32_t y,uint32_t w,uint32_t h)437 supertile_in_job_scissors(struct v3d_job *job,
438 uint32_t x, uint32_t y, uint32_t w, uint32_t h)
439 {
440 if (job->scissor.disabled || job->scissor.count == 0)
441 return true;
442
443 const uint32_t min_x = x * w;
444 const uint32_t min_y = y * h;
445 const uint32_t max_x = min_x + w - 1;
446 const uint32_t max_y = min_y + h - 1;
447
448 for (uint32_t i = 0; i < job->scissor.count; i++) {
449 const uint32_t min_s_x = job->scissor.rects[i].min_x;
450 const uint32_t min_s_y = job->scissor.rects[i].min_y;
451 const uint32_t max_s_x = job->scissor.rects[i].max_x;
452 const uint32_t max_s_y = job->scissor.rects[i].max_y;
453
454 if (max_x < min_s_x || min_x > max_s_x ||
455 max_y < min_s_y || min_y > max_s_y) {
456 continue;
457 }
458
459 return true;
460 }
461
462 return false;
463 }
464
465 static inline bool
do_double_initial_tile_clear(const struct v3d_job * job)466 do_double_initial_tile_clear(const struct v3d_job *job)
467 {
468 /* Our rendering code emits an initial clear per layer, unlike the
469 * Vulkan driver, which only executes a single initial clear for all
470 * layers. This is because in GL we don't use the
471 * 'clear_buffer_being_stored' bit when storing tiles, so each layer
472 * needs the iniital clear. This is also why this helper, unlike the
473 * Vulkan version, doesn't check the layer count to decide if double
474 * clear for double buffer mode is required.
475 */
476 return job->double_buffer &&
477 (job->draw_tiles_x > 1 || job->draw_tiles_y > 1);
478 }
479
480 static void
emit_render_layer(struct v3d_job * job,uint32_t layer)481 emit_render_layer(struct v3d_job *job, uint32_t layer)
482 {
483 uint32_t supertile_w = 1, supertile_h = 1;
484
485 /* If doing multicore binning, we would need to initialize each
486 * core's tile list here.
487 */
488 uint32_t tile_alloc_offset =
489 layer * job->draw_tiles_x * job->draw_tiles_y * 64;
490 cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
491 list.address = cl_address(job->tile_alloc, tile_alloc_offset);
492 }
493
494 cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
495 uint32_t frame_w_in_supertiles, frame_h_in_supertiles;
496 const uint32_t max_supertiles = 256;
497
498 /* Size up our supertiles until we get under the limit. */
499 for (;;) {
500 frame_w_in_supertiles = DIV_ROUND_UP(job->draw_tiles_x,
501 supertile_w);
502 frame_h_in_supertiles = DIV_ROUND_UP(job->draw_tiles_y,
503 supertile_h);
504 if (frame_w_in_supertiles *
505 frame_h_in_supertiles < max_supertiles) {
506 break;
507 }
508
509 if (supertile_w < supertile_h)
510 supertile_w++;
511 else
512 supertile_h++;
513 }
514
515 config.number_of_bin_tile_lists = 1;
516 config.total_frame_width_in_tiles = job->draw_tiles_x;
517 config.total_frame_height_in_tiles = job->draw_tiles_y;
518
519 config.supertile_width_in_tiles = supertile_w;
520 config.supertile_height_in_tiles = supertile_h;
521
522 config.total_frame_width_in_supertiles = frame_w_in_supertiles;
523 config.total_frame_height_in_supertiles = frame_h_in_supertiles;
524 }
525
526 /* Start by clearing the tile buffer. */
527 cl_emit(&job->rcl, TILE_COORDINATES, coords) {
528 coords.tile_column_number = 0;
529 coords.tile_row_number = 0;
530 }
531
532 /* Emit an initial clear of the tile buffers. This is necessary
533 * for any buffers that should be cleared (since clearing
534 * normally happens at the *end* of the generic tile list), but
535 * it's also nice to clear everything so the first tile doesn't
536 * inherit any contents from some previous frame.
537 *
538 * Also, implement the GFXH-1742 workaround. There's a race in
539 * the HW between the RCL updating the TLB's internal type/size
540 * and thespawning of the QPU instances using the TLB's current
541 * internal type/size. To make sure the QPUs get the right
542 * state, we need 1 dummy store in between internal type/size
543 * changes on V3D 3.x, and 2 dummy stores on 4.x.
544 */
545 for (int i = 0; i < 2; i++) {
546 if (i > 0)
547 cl_emit(&job->rcl, TILE_COORDINATES, coords);
548 cl_emit(&job->rcl, END_OF_LOADS, end);
549 cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
550 store.buffer_to_store = NONE;
551 }
552
553 if (i == 0 || do_double_initial_tile_clear(job)) {
554 #if V3D_VERSION < 71
555 cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) {
556 clear.clear_z_stencil_buffer = !job->early_zs_clear;
557 clear.clear_all_render_targets = true;
558 }
559 #else
560 cl_emit(&job->rcl, CLEAR_RENDER_TARGETS, clear);
561 #endif
562 }
563 cl_emit(&job->rcl, END_OF_TILE_MARKER, end);
564 }
565 cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
566
567 v3d_rcl_emit_generic_per_tile_list(job, layer);
568
569 /* XXX perf: We should expose GL_MESA_tile_raster_order to
570 * improve X11 performance, but we should use Morton order
571 * otherwise to improve cache locality.
572 */
573 uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
574 uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
575 uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
576 uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels;
577
578 uint32_t max_x_supertile = 0;
579 uint32_t max_y_supertile = 0;
580 if (job->draw_max_x != 0 && job->draw_max_y != 0) {
581 max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels;
582 max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels;
583 }
584
585 for (int y = min_y_supertile; y <= max_y_supertile; y++) {
586 for (int x = min_x_supertile; x <= max_x_supertile; x++) {
587 if (supertile_in_job_scissors(job, x, y,
588 supertile_w_in_pixels,
589 supertile_h_in_pixels)) {
590 cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) {
591 coords.column_number_in_supertiles = x;
592 coords.row_number_in_supertiles = y;
593 }
594 }
595 }
596 }
597 }
598
599 void
v3dX(emit_rcl)600 v3dX(emit_rcl)(struct v3d_job *job)
601 {
602 /* The RCL list should be empty. */
603 assert(!job->rcl.bo);
604
605 v3d_cl_ensure_space_with_branch(&job->rcl, 200 +
606 MAX2(job->num_layers, 1) * 256 *
607 cl_packet_length(SUPERTILE_COORDINATES));
608 job->submit.rcl_start = job->rcl.bo->offset;
609 v3d_job_add_bo(job, job->rcl.bo);
610
611 /* Common config must be the first TILE_RENDERING_MODE_CFG
612 * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are
613 * optional updates to the previous HW state.
614 */
615 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
616 if (job->zsbuf) {
617 struct v3d_surface *surf = v3d_surface(job->zsbuf);
618 config.internal_depth_type = surf->internal_type;
619 }
620
621 if (job->decided_global_ez_enable) {
622 switch (job->first_ez_state) {
623 case V3D_EZ_UNDECIDED:
624 case V3D_EZ_LT_LE:
625 config.early_z_disable = false;
626 config.early_z_test_and_update_direction =
627 EARLY_Z_DIRECTION_LT_LE;
628 break;
629 case V3D_EZ_GT_GE:
630 config.early_z_disable = false;
631 config.early_z_test_and_update_direction =
632 EARLY_Z_DIRECTION_GT_GE;
633 break;
634 case V3D_EZ_DISABLED:
635 config.early_z_disable = true;
636 }
637 } else {
638 assert(job->draw_calls_queued == 0);
639 config.early_z_disable = true;
640 }
641
642 assert(job->zsbuf || config.early_z_disable);
643
644 job->early_zs_clear = (job->clear & PIPE_CLEAR_DEPTHSTENCIL) &&
645 !(job->load & PIPE_CLEAR_DEPTHSTENCIL) &&
646 !(job->store & PIPE_CLEAR_DEPTHSTENCIL);
647
648 config.early_depth_stencil_clear = job->early_zs_clear;
649
650 config.image_width_pixels = job->draw_width;
651 config.image_height_pixels = job->draw_height;
652
653 config.number_of_render_targets = MAX2(job->nr_cbufs, 1);
654
655 assert(!job->msaa || !job->double_buffer);
656 config.multisample_mode_4x = job->msaa;
657 config.double_buffer_in_non_ms_mode = job->double_buffer;
658
659 #if V3D_VERSION == 42
660 config.maximum_bpp_of_all_render_targets = job->internal_bpp;
661 #endif
662 #if V3D_VERSION >= 71
663 config.log2_tile_width = log2_tile_size(job->tile_width);
664 config.log2_tile_height = log2_tile_size(job->tile_height);
665
666 /* FIXME: ideallly we would like next assert on the packet header (as is
667 * general, so also applies to GL). We would need to expand
668 * gen_pack_header for that.
669 */
670 assert(config.log2_tile_width == config.log2_tile_height ||
671 config.log2_tile_width == config.log2_tile_height + 1);
672 #endif
673
674 }
675
676 #if V3D_VERSION >= 71
677 uint32_t base_addr = 0;
678
679 /* If we don't have any color RTs, we sill need to emit one and flag
680 * it as not used using stride = 1
681 */
682 if (job->nr_cbufs == 0) {
683 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
684 rt.stride = 1; /* Unused */
685 }
686 }
687 #endif
688 for (int i = 0; i < job->nr_cbufs; i++) {
689 struct pipe_surface *psurf = job->cbufs[i];
690 if (!psurf) {
691 #if V3D_VERSION >= 71
692 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
693 rt.render_target_number = i;
694 rt.stride = 1; /* Unused */
695 }
696 #endif
697 continue;
698 }
699
700 struct v3d_surface *surf = v3d_surface(psurf);
701 struct v3d_resource *rsc = v3d_resource(psurf->texture);
702
703 UNUSED uint32_t config_pad = 0;
704 UNUSED uint32_t clear_pad = 0;
705
706 /* XXX: Set the pad for raster. */
707 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
708 surf->tiling == V3D_TILING_UIF_XOR) {
709 int uif_block_height = v3d_utile_height(rsc->cpp) * 2;
710 uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) /
711 uif_block_height);
712 if (surf->padded_height_of_output_image_in_uif_blocks -
713 implicit_padded_height < 15) {
714 config_pad = (surf->padded_height_of_output_image_in_uif_blocks -
715 implicit_padded_height);
716 } else {
717 config_pad = 15;
718 clear_pad = surf->padded_height_of_output_image_in_uif_blocks;
719 }
720 }
721
722 #if V3D_VERSION == 42
723 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1,
724 clear) {
725 clear.clear_color_low_32_bits = job->clear_color[i][0];
726 clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff;
727 clear.render_target_number = i;
728 };
729
730 if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
731 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2,
732 clear) {
733 clear.clear_color_mid_low_32_bits =
734 ((job->clear_color[i][1] >> 24) |
735 (job->clear_color[i][2] << 8));
736 clear.clear_color_mid_high_24_bits =
737 ((job->clear_color[i][2] >> 24) |
738 ((job->clear_color[i][3] & 0xffff) << 8));
739 clear.render_target_number = i;
740 };
741 }
742
743 if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
744 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3,
745 clear) {
746 clear.uif_padded_height_in_uif_blocks = clear_pad;
747 clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16;
748 clear.render_target_number = i;
749 };
750 }
751 #endif
752 #if V3D_VERSION >= 71
753 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
754 rt.clear_color_low_bits = job->clear_color[i][0];
755 v3d_setup_render_target(job, i, &rt.internal_bpp,
756 &rt.internal_type_and_clamping);
757 rt.stride =
758 v3d_compute_rt_row_row_stride_128_bits(job->tile_width,
759 v3d_internal_bpp_words(rt.internal_bpp));
760 rt.base_address = base_addr;
761 rt.render_target_number = i;
762
763 base_addr += (job->tile_height * rt.stride) / 8;
764 }
765
766 if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
767 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) {
768 rt.clear_color_mid_bits = /* 40 bits (32 + 8) */
769 ((uint64_t) job->clear_color[i][1]) |
770 (((uint64_t) (job->clear_color[i][2] & 0xff)) << 32);
771 rt.render_target_number = i;
772 }
773 }
774
775 if (surf->internal_bpp >= V3D_INTERNAL_BPP_128) {
776 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) {
777 rt.clear_color_top_bits = /* 56 bits (24 + 32) */
778 (((uint64_t) (job->clear_color[i][2] & 0xffffff00)) >> 8) |
779 (((uint64_t) (job->clear_color[i][3])) << 24);
780 rt.render_target_number = i;
781 }
782 }
783 #endif
784 }
785
786 #if V3D_VERSION == 42
787 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
788 v3d_setup_render_target(job, 0,
789 &rt.render_target_0_internal_bpp,
790 &rt.render_target_0_internal_type,
791 &rt.render_target_0_clamp);
792 v3d_setup_render_target(job, 1,
793 &rt.render_target_1_internal_bpp,
794 &rt.render_target_1_internal_type,
795 &rt.render_target_1_clamp);
796 v3d_setup_render_target(job, 2,
797 &rt.render_target_2_internal_bpp,
798 &rt.render_target_2_internal_type,
799 &rt.render_target_2_clamp);
800 v3d_setup_render_target(job, 3,
801 &rt.render_target_3_internal_bpp,
802 &rt.render_target_3_internal_type,
803 &rt.render_target_3_clamp);
804 }
805 #endif
806
807 /* Ends rendering mode config. */
808 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES,
809 clear) {
810 clear.z_clear_value = job->clear_z;
811 clear.stencil_clear_value = job->clear_s;
812 };
813
814 /* Always set initial block size before the first branch, which needs
815 * to match the value from binning mode config.
816 */
817 cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
818 init.use_auto_chained_tile_lists = true;
819 init.size_of_first_block_in_chained_tile_lists =
820 TILE_ALLOCATION_BLOCK_SIZE_64B;
821 }
822
823 /* ARB_framebuffer_no_attachments allows rendering to happen even when
824 * the framebuffer has no attachments, the idea being that fragment
825 * shaders can still do image load/store, ssbo, etc without having to
826 * write to actual attachments, so always run at least one iteration
827 * of the loop.
828 */
829 assert(job->num_layers > 0 || (job->load == 0 && job->store == 0));
830 for (int layer = 0; layer < MAX2(1, job->num_layers); layer++)
831 emit_render_layer(job, layer);
832
833 cl_emit(&job->rcl, END_OF_RENDERING, end);
834 }
835