• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "pipe/p_state.h"
28 #include "util/u_memory.h"
29 #include "util/u_prim.h"
30 #include "util/u_string.h"
31 
32 #include "freedreno_resource.h"
33 #include "freedreno_state.h"
34 
35 #include "fd2_context.h"
36 #include "fd2_draw.h"
37 #include "fd2_emit.h"
38 #include "fd2_program.h"
39 #include "fd2_util.h"
40 #include "fd2_zsa.h"
41 
42 static inline uint32_t
pack_rgba(enum pipe_format format,const float * rgba)43 pack_rgba(enum pipe_format format, const float *rgba)
44 {
45    union util_color uc;
46    util_pack_color(rgba, format, &uc);
47    return uc.ui[0];
48 }
49 
50 static void
emit_cacheflush(struct fd_ringbuffer * ring)51 emit_cacheflush(struct fd_ringbuffer *ring)
52 {
53    unsigned i;
54 
55    for (i = 0; i < 12; i++) {
56       OUT_PKT3(ring, CP_EVENT_WRITE, 1);
57       OUT_RING(ring, CACHE_FLUSH);
58    }
59 }
60 
61 static void
emit_vertexbufs(struct fd_context * ctx)62 emit_vertexbufs(struct fd_context *ctx) assert_dt
63 {
64    struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
65    struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
66    struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
67    unsigned i;
68 
69    if (!vtx->num_elements)
70       return;
71 
72    for (i = 0; i < vtx->num_elements; i++) {
73       struct pipe_vertex_element *elem = &vtx->pipe[i];
74       struct pipe_vertex_buffer *vb = &vertexbuf->vb[elem->vertex_buffer_index];
75       bufs[i].offset = vb->buffer_offset;
76       bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo);
77       bufs[i].prsc = vb->buffer.resource;
78    }
79 
80    // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
81    // CONST(20,0) (or CONST(26,0) in soliv_vp)
82 
83    fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
84    fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);
85 }
86 
87 static void
draw_impl(struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,struct fd_ringbuffer * ring,unsigned index_offset,bool binning)88 draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
89           const struct pipe_draw_start_count_bias *draw, struct fd_ringbuffer *ring,
90           unsigned index_offset, bool binning) assert_dt
91 {
92    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
93    OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
94    OUT_RING(ring, info->index_size ? 0 : draw->start);
95 
96    OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
97    OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
98 
99    if (is_a20x(ctx->screen)) {
100       /* wait for DMA to finish and
101        * dummy draw one triangle with indexes 0,0,0.
102        * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
103        *
104        * this workaround is for a HW bug related to DMA alignment:
105        * it is necessary for indexed draws and possibly also
106        * draws that read binning data
107        */
108       OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
109       OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
110       OUT_RING(ring, 0x00000000);
111       OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
112       OUT_RING(ring, 0x00000001);
113 
114       OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
115       OUT_RING(ring, 0x00000000);
116       OUT_RING(ring, 0x0003c004);
117       OUT_RING(ring, 0x00000000);
118       OUT_RING(ring, 0x00000003);
119       OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0,
120                 0);
121       OUT_RING(ring, 0x00000006);
122    } else {
123       OUT_WFI(ring);
124 
125       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
126       OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
127       OUT_RING(ring, info->index_bounds_valid ? info->max_index
128                                               : ~0); /* VGT_MAX_VTX_INDX */
129       OUT_RING(ring, info->index_bounds_valid ? info->min_index
130                                               : 0); /* VGT_MIN_VTX_INDX */
131    }
132 
133    /* binning shader will take offset from C64 */
134    if (binning && is_a20x(ctx->screen)) {
135       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
136       OUT_RING(ring, 0x00000180);
137       OUT_RING(ring, fui(ctx->batch->num_vertices));
138       OUT_RING(ring, fui(0.0f));
139       OUT_RING(ring, fui(0.0f));
140       OUT_RING(ring, fui(0.0f));
141    }
142 
143    enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
144    if (binning || info->mode == MESA_PRIM_POINTS)
145       vismode = IGNORE_VISIBILITY;
146 
147    fd_draw_emit(ctx->batch, ring, ctx->screen->primtypes[info->mode],
148                 vismode, info, draw, index_offset);
149 
150    if (is_a20x(ctx->screen)) {
151       /* not sure why this is required, but it fixes some hangs */
152       OUT_WFI(ring);
153    } else {
154       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
155       OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
156       OUT_RING(ring, 0x00000000);
157    }
158 
159    emit_cacheflush(ring);
160 }
161 
162 static bool
fd2_draw_vbo(struct fd_context * ctx,const struct pipe_draw_info * pinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * pdraw,unsigned index_offset)163 fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
164              unsigned drawid_offset,
165              const struct pipe_draw_indirect_info *indirect,
166              const struct pipe_draw_start_count_bias *pdraw,
167              unsigned index_offset) assert_dt
168 {
169    if (!ctx->prog.fs || !ctx->prog.vs)
170       return false;
171 
172    if (pinfo->mode != MESA_PRIM_COUNT && !indirect && !pinfo->primitive_restart &&
173        !u_trim_pipe_prim(pinfo->mode, (unsigned *)&pdraw->count))
174       return false;
175 
176    if (ctx->dirty & FD_DIRTY_VTXBUF)
177       emit_vertexbufs(ctx);
178 
179    fd_blend_tracking(ctx);
180 
181    if (fd_binning_enabled)
182       fd2_emit_state_binning(ctx, ctx->dirty);
183 
184    fd2_emit_state(ctx, ctx->dirty);
185 
186    /* a2xx can draw only 65535 vertices at once
187     * on a22x the field in the draw command is 32bits but seems limited too
188     * using a limit of 32k because it fixes an unexplained hang
189     * 32766 works for all primitives (multiple of 2 and 3)
190     */
191    if (pdraw->count > 32766) {
192       /* clang-format off */
193       static const uint16_t step_tbl[MESA_PRIM_COUNT] = {
194          [0 ... MESA_PRIM_COUNT - 1]  = 32766,
195          [MESA_PRIM_LINE_STRIP]     = 32765,
196          [MESA_PRIM_TRIANGLE_STRIP] = 32764,
197 
198          /* needs more work */
199          [MESA_PRIM_TRIANGLE_FAN]   = 0,
200          [MESA_PRIM_LINE_LOOP]      = 0,
201       };
202       /* clang-format on */
203 
204       struct pipe_draw_start_count_bias draw = *pdraw;
205       unsigned count = draw.count;
206       unsigned step = step_tbl[pinfo->mode];
207       unsigned num_vertices = ctx->batch->num_vertices;
208 
209       if (!step)
210          return false;
211 
212       for (; count + step > 32766; count -= step) {
213          draw.count = MIN2(count, 32766);
214          draw_impl(ctx, pinfo, &draw, ctx->batch->draw, index_offset, false);
215          draw_impl(ctx, pinfo, &draw, ctx->batch->binning, index_offset, true);
216          draw.start += step;
217          ctx->batch->num_vertices += step;
218       }
219       /* changing this value is a hack, restore it */
220       ctx->batch->num_vertices = num_vertices;
221    } else {
222       draw_impl(ctx, pinfo, pdraw, ctx->batch->draw, index_offset, false);
223       draw_impl(ctx, pinfo, pdraw, ctx->batch->binning, index_offset, true);
224    }
225 
226    fd_context_all_clean(ctx);
227 
228    ctx->batch->num_vertices += pdraw->count * pinfo->instance_count;
229 
230    return true;
231 }
232 
233 static void
fd2_draw_vbos(struct fd_context * ctx,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned index_offset)234 fd2_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
235               unsigned drawid_offset,
236               const struct pipe_draw_indirect_info *indirect,
237               const struct pipe_draw_start_count_bias *draws,
238               unsigned num_draws,
239               unsigned index_offset)
240    assert_dt
241 {
242    for (unsigned i = 0; i < num_draws; i++)
243       fd2_draw_vbo(ctx, info, drawid_offset, indirect, &draws[i], index_offset);
244 }
245 
246 static void
clear_state(struct fd_batch * batch,struct fd_ringbuffer * ring,unsigned buffers,bool fast_clear)247 clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring,
248             unsigned buffers, bool fast_clear) assert_dt
249 {
250    struct fd_context *ctx = batch->ctx;
251    struct fd2_context *fd2_ctx = fd2_context(ctx);
252    uint32_t reg;
253 
254    fd2_emit_vertex_bufs(ring, 0x9c,
255                         (struct fd2_vertex_buf[]){
256                            {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
257                         },
258                         1);
259 
260    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
261    OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
262    OUT_RING(ring, 0);
263 
264    fd2_program_emit(ctx, ring, &ctx->solid_prog);
265 
266    OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
267    OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
268 
269    if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
270       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
271       OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
272       reg = 0;
273       if (buffers & PIPE_CLEAR_DEPTH) {
274          reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
275                 A2XX_RB_DEPTHCONTROL_Z_ENABLE |
276                 A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
277                 A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
278       }
279       if (buffers & PIPE_CLEAR_STENCIL) {
280          reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
281                 A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
282                 A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
283       }
284       OUT_RING(ring, reg);
285    }
286 
287    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
288    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
289    OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
290                      A2XX_RB_COLORCONTROL_BLEND_DISABLE |
291                      A2XX_RB_COLORCONTROL_ROP_CODE(12) |
292                      A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
293                      A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
294 
295    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
296    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
297    OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */
298    OUT_RING(
299       ring,
300       A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
301          A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
302          A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |
303          (fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));
304 
305    if (fast_clear) {
306       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
307       OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
308       OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));
309    }
310 
311    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
312    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
313    OUT_RING(ring, 0x0000ffff);
314 
315    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
316    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
317    if (buffers & PIPE_CLEAR_COLOR) {
318       OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
319                         A2XX_RB_COLOR_MASK_WRITE_GREEN |
320                         A2XX_RB_COLOR_MASK_WRITE_BLUE |
321                         A2XX_RB_COLOR_MASK_WRITE_ALPHA);
322    } else {
323       OUT_RING(ring, 0x0);
324    }
325 
326    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
327    OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
328    OUT_RING(ring, 0);
329 
330    if (is_a20x(batch->ctx->screen))
331       return;
332 
333    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
334    OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
335    OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
336    OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
337 
338    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
339    OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
340    OUT_RING(ring,
341             0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
342    OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
343 
344    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
345    OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
346    OUT_RING(ring, 0x00000084);
347 
348    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
349    OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
350    OUT_RING(ring, 0x0000028f);
351 }
352 
353 static void
clear_state_restore(struct fd_context * ctx,struct fd_ringbuffer * ring)354 clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
355 {
356    if (is_a20x(ctx->screen))
357       return;
358 
359    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
360    OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
361    OUT_RING(ring, 0x00000000);
362 
363    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
364    OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
365    OUT_RING(ring, 0x00000000);
366 
367    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
368    OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
369    OUT_RING(ring, 0x0000003b);
370 }
371 
372 static void
clear_fast(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t color_clear,uint32_t depth_clear,unsigned patch_type)373 clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,
374            uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)
375 {
376    BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */
377 
378    /* zero values are patched in */
379    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
380    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
381    OUT_RINGP(ring, patch_type, &batch->gmem_patches);
382 
383    OUT_PKT3(ring, CP_SET_CONSTANT, 4);
384    OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
385    OUT_RING(ring, 0x8000 | 32);
386    OUT_RING(ring, 0);
387    OUT_RING(ring, 0);
388 
389    /* set fill values */
390    if (!is_a20x(batch->ctx->screen)) {
391       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
392       OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
393       OUT_RING(ring, color_clear);
394 
395       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
396       OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
397       OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
398                         A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));
399 
400       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
401       OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
402       OUT_RING(ring, depth_clear);
403    } else {
404       const float sc = 1.0f / 255.0f;
405 
406       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
407       OUT_RING(ring, 0x00000480);
408       OUT_RING(ring, fui((float)(color_clear >> 0 & 0xff) * sc));
409       OUT_RING(ring, fui((float)(color_clear >> 8 & 0xff) * sc));
410       OUT_RING(ring, fui((float)(color_clear >> 16 & 0xff) * sc));
411       OUT_RING(ring, fui((float)(color_clear >> 24 & 0xff) * sc));
412 
413       // XXX if using float the rounding error breaks it..
414       float depth = ((double)(depth_clear >> 8)) * (1.0 / (double)0xffffff);
415       assert((unsigned)(((double)depth * (double)0xffffff)) ==
416              (depth_clear >> 8));
417 
418       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
419       OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
420       OUT_RING(ring, fui(0.0f));
421       OUT_RING(ring, fui(depth));
422 
423       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
424       OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
425       OUT_RING(ring,
426                0xff000000 |
427                   A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |
428                   A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
429       OUT_RING(ring, 0xff000000 |
430                         A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |
431                         A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
432    }
433 
434    fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
435            DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
436 }
437 
438 static bool
fd2_clear_fast(struct fd_context * ctx,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)439 fd2_clear_fast(struct fd_context *ctx, unsigned buffers,
440                const union pipe_color_union *color, double depth,
441                unsigned stencil) assert_dt
442 {
443    /* using 4x MSAA allows clearing ~2x faster
444     * then we can use higher bpp clearing to clear lower bpp
445     * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)
446     * note: its possible to clear with 32_32_32_32 format but its not faster
447     * note: fast clear doesn't work with sysmem rendering
448     * (sysmem rendering is disabled when clear is used)
449     *
450     * we only have 16-bit / 32-bit color formats
451     * and 16-bit / 32-bit depth formats
452     * so there are only a few possible combinations
453     *
454     * if the bpp of the color/depth doesn't match
455     * we clear with depth/color individually
456     */
457    struct fd2_context *fd2_ctx = fd2_context(ctx);
458    struct fd_batch *batch = ctx->batch;
459    struct fd_ringbuffer *ring = batch->draw;
460    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
461    uint32_t color_clear = 0, depth_clear = 0;
462    enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
463    int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */
464    int color_size = -1;
465 
466    /* TODO: need to test performance on a22x */
467    if (!is_a20x(ctx->screen))
468       return false;
469 
470    if (buffers & PIPE_CLEAR_COLOR)
471       color_size = util_format_get_blocksizebits(format) == 32;
472 
473    if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
474       /* no fast clear when clearing only one component of depth+stencil buffer */
475       if (!(buffers & PIPE_CLEAR_DEPTH))
476          return false;
477 
478       if ((pfb->zsbuf->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
479            pfb->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
480           !(buffers & PIPE_CLEAR_STENCIL))
481          return false;
482 
483       depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;
484    }
485 
486    assert(color_size >= 0 || depth_size >= 0);
487 
488    if (color_size == 0) {
489       color_clear = pack_rgba(format, color->f);
490       color_clear = (color_clear << 16) | (color_clear & 0xffff);
491    } else if (color_size == 1) {
492       color_clear = pack_rgba(format, color->f);
493    }
494 
495    if (depth_size == 0) {
496       depth_clear = (uint32_t)(0xffff * depth);
497       depth_clear |= depth_clear << 16;
498    } else if (depth_size == 1) {
499       depth_clear = (((uint32_t)(0xffffff * depth)) << 8);
500       depth_clear |= (stencil & 0xff);
501    }
502 
503    /* disable "window" scissor.. */
504    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
505    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
506    OUT_RING(ring, xy2d(0, 0));
507    OUT_RING(ring, xy2d(0x7fff, 0x7fff));
508 
509    /* make sure we fill all "pixels" (in SCREEN_SCISSOR) */
510    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
511    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
512    OUT_RING(ring, fui(4096.0f));
513    OUT_RING(ring, fui(4096.0f));
514    OUT_RING(ring, fui(4096.0f));
515    OUT_RING(ring, fui(4096.0f));
516 
517    clear_state(batch, ring, ~0u, true);
518 
519    if (color_size >= 0 && depth_size != color_size)
520       clear_fast(batch, ring, color_clear, color_clear,
521                  GMEM_PATCH_FASTCLEAR_COLOR);
522 
523    if (depth_size >= 0 && depth_size != color_size)
524       clear_fast(batch, ring, depth_clear, depth_clear,
525                  GMEM_PATCH_FASTCLEAR_DEPTH);
526 
527    if (depth_size == color_size)
528       clear_fast(batch, ring, color_clear, depth_clear,
529                  GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);
530 
531    clear_state_restore(ctx, ring);
532 
533    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
534    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
535    OUT_RING(ring, 0);
536 
537    /* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.
538     * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT
539     * the value is read from byte offset 60 in the given bo
540     */
541    OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);
542    OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);
543    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
544    OUT_RING(ring, 1);
545 
546    OUT_PKT3(ring, CP_SET_CONSTANT, 4);
547    OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
548    OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);
549    OUT_RING(ring, 0);
550    OUT_RING(ring, 0);
551    return true;
552 }
553 
554 static bool
fd2_clear(struct fd_context * ctx,enum fd_buffer_mask buffers,const union pipe_color_union * color,double depth,unsigned stencil)555 fd2_clear(struct fd_context *ctx, enum fd_buffer_mask buffers,
556           const union pipe_color_union *color, double depth,
557           unsigned stencil) assert_dt
558 {
559    struct fd_ringbuffer *ring = ctx->batch->draw;
560    struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
561 
562    if (fd2_clear_fast(ctx, buffers, color, depth, stencil))
563       goto dirty;
564 
565    /* set clear value */
566    if (is_a20x(ctx->screen)) {
567       if (buffers & FD_BUFFER_COLOR) {
568          /* C0 used by fragment shader */
569          OUT_PKT3(ring, CP_SET_CONSTANT, 5);
570          OUT_RING(ring, 0x00000480);
571          OUT_RING(ring, color->ui[0]);
572          OUT_RING(ring, color->ui[1]);
573          OUT_RING(ring, color->ui[2]);
574          OUT_RING(ring, color->ui[3]);
575       }
576 
577       if (buffers & FD_BUFFER_DEPTH) {
578          /* use viewport to set depth value */
579          OUT_PKT3(ring, CP_SET_CONSTANT, 3);
580          OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
581          OUT_RING(ring, fui(0.0f));
582          OUT_RING(ring, fui(depth));
583       }
584 
585       if (buffers & FD_BUFFER_STENCIL) {
586          OUT_PKT3(ring, CP_SET_CONSTANT, 3);
587          OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
588          OUT_RING(ring, 0xff000000 |
589                            A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |
590                            A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
591          OUT_RING(ring, 0xff000000 |
592                            A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |
593                            A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
594       }
595    } else {
596       if (buffers & FD_BUFFER_COLOR) {
597          OUT_PKT3(ring, CP_SET_CONSTANT, 2);
598          OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
599          OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));
600       }
601 
602       if (buffers & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
603          uint32_t clear_mask, depth_clear;
604          switch (fd_pipe2depth(fb->zsbuf->format)) {
605          case DEPTHX_24_8:
606             clear_mask = ((buffers & FD_BUFFER_DEPTH) ? 0xe : 0) |
607                          ((buffers & FD_BUFFER_STENCIL) ? 0x1 : 0);
608             depth_clear =
609                (((uint32_t)(0xffffff * depth)) << 8) | (stencil & 0xff);
610             break;
611          case DEPTHX_16:
612             clear_mask = 0xf;
613             depth_clear = (uint32_t)(0xffffffff * depth);
614             break;
615          default:
616             unreachable("invalid depth");
617             break;
618          }
619 
620          OUT_PKT3(ring, CP_SET_CONSTANT, 2);
621          OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
622          OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
623                            A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));
624 
625          OUT_PKT3(ring, CP_SET_CONSTANT, 2);
626          OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
627          OUT_RING(ring, depth_clear);
628       }
629    }
630 
631    /* scissor state */
632    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
633    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
634    OUT_RING(ring, xy2d(0, 0));
635    OUT_RING(ring, xy2d(fb->width, fb->height));
636 
637    /* viewport state */
638    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
639    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
640    OUT_RING(ring, fui((float)fb->width / 2.0f));
641    OUT_RING(ring, fui((float)fb->width / 2.0f));
642    OUT_RING(ring, fui((float)fb->height / 2.0f));
643    OUT_RING(ring, fui((float)fb->height / 2.0f));
644 
645    /* common state */
646    clear_state(ctx->batch, ring, buffers, false);
647 
648    fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
649            DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
650 
651    clear_state_restore(ctx, ring);
652 
653 dirty:
654    ctx->dirty |= FD_DIRTY_ZSA | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER |
655                  FD_DIRTY_SAMPLE_MASK | FD_DIRTY_PROG | FD_DIRTY_CONST |
656                  FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
657 
658    ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG;
659    ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |=
660       FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
661 
662    return true;
663 }
664 
665 void
fd2_draw_init(struct pipe_context * pctx)666 fd2_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
667 {
668    struct fd_context *ctx = fd_context(pctx);
669    ctx->draw_vbos = fd2_draw_vbos;
670    ctx->clear = fd2_clear;
671 }
672