• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "pipe/p_state.h"
28 #include "util/u_memory.h"
29 #include "util/u_prim.h"
30 #include "util/u_string.h"
31 
32 #include "freedreno_resource.h"
33 #include "freedreno_state.h"
34 
35 #include "fd2_context.h"
36 #include "fd2_draw.h"
37 #include "fd2_emit.h"
38 #include "fd2_program.h"
39 #include "fd2_util.h"
40 #include "fd2_zsa.h"
41 
42 static void
emit_cacheflush(struct fd_ringbuffer * ring)43 emit_cacheflush(struct fd_ringbuffer *ring)
44 {
45    unsigned i;
46 
47    for (i = 0; i < 12; i++) {
48       OUT_PKT3(ring, CP_EVENT_WRITE, 1);
49       OUT_RING(ring, CACHE_FLUSH);
50    }
51 }
52 
53 static void
emit_vertexbufs(struct fd_context * ctx)54 emit_vertexbufs(struct fd_context *ctx) assert_dt
55 {
56    struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
57    struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
58    struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
59    unsigned i;
60 
61    if (!vtx->num_elements)
62       return;
63 
64    for (i = 0; i < vtx->num_elements; i++) {
65       struct pipe_vertex_element *elem = &vtx->pipe[i];
66       struct pipe_vertex_buffer *vb = &vertexbuf->vb[elem->vertex_buffer_index];
67       bufs[i].offset = vb->buffer_offset;
68       bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo);
69       bufs[i].prsc = vb->buffer.resource;
70    }
71 
72    // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
73    // CONST(20,0) (or CONST(26,0) in soliv_vp)
74 
75    fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
76    fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);
77 }
78 
79 static void
draw_impl(struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,struct fd_ringbuffer * ring,unsigned index_offset,bool binning)80 draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
81           const struct pipe_draw_start_count_bias *draw, struct fd_ringbuffer *ring,
82           unsigned index_offset, bool binning) assert_dt
83 {
84    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
85    OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
86    OUT_RING(ring, info->index_size ? 0 : draw->start);
87 
88    OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
89    OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
90 
91    if (is_a20x(ctx->screen)) {
92       /* wait for DMA to finish and
93        * dummy draw one triangle with indexes 0,0,0.
94        * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
95        *
96        * this workaround is for a HW bug related to DMA alignment:
97        * it is necessary for indexed draws and possibly also
98        * draws that read binning data
99        */
100       OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
101       OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
102       OUT_RING(ring, 0x00000000);
103       OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
104       OUT_RING(ring, 0x00000001);
105 
106       OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
107       OUT_RING(ring, 0x00000000);
108       OUT_RING(ring, 0x0003c004);
109       OUT_RING(ring, 0x00000000);
110       OUT_RING(ring, 0x00000003);
111       OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0,
112                 0);
113       OUT_RING(ring, 0x00000006);
114    } else {
115       OUT_WFI(ring);
116 
117       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
118       OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
119       OUT_RING(ring, info->index_bounds_valid ? info->max_index
120                                               : ~0); /* VGT_MAX_VTX_INDX */
121       OUT_RING(ring, info->index_bounds_valid ? info->min_index
122                                               : 0); /* VGT_MIN_VTX_INDX */
123    }
124 
125    /* binning shader will take offset from C64 */
126    if (binning && is_a20x(ctx->screen)) {
127       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
128       OUT_RING(ring, 0x00000180);
129       OUT_RING(ring, fui(ctx->batch->num_vertices));
130       OUT_RING(ring, fui(0.0f));
131       OUT_RING(ring, fui(0.0f));
132       OUT_RING(ring, fui(0.0f));
133    }
134 
135    enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
136    if (binning || info->mode == PIPE_PRIM_POINTS)
137       vismode = IGNORE_VISIBILITY;
138 
139    fd_draw_emit(ctx->batch, ring, ctx->screen->primtypes[info->mode],
140                 vismode, info, draw, index_offset);
141 
142    if (is_a20x(ctx->screen)) {
143       /* not sure why this is required, but it fixes some hangs */
144       OUT_WFI(ring);
145    } else {
146       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
147       OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
148       OUT_RING(ring, 0x00000000);
149    }
150 
151    emit_cacheflush(ring);
152 }
153 
154 static bool
fd2_draw_vbo(struct fd_context * ctx,const struct pipe_draw_info * pinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * pdraw,unsigned index_offset)155 fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
156 			 unsigned drawid_offset,
157              const struct pipe_draw_indirect_info *indirect,
158              const struct pipe_draw_start_count_bias *pdraw,
159              unsigned index_offset) assert_dt
160 {
161    if (!ctx->prog.fs || !ctx->prog.vs)
162       return false;
163 
164    if (pinfo->mode != PIPE_PRIM_MAX && !indirect && !pinfo->primitive_restart &&
165        !u_trim_pipe_prim(pinfo->mode, (unsigned *)&pdraw->count))
166       return false;
167 
168    if (ctx->dirty & FD_DIRTY_VTXBUF)
169       emit_vertexbufs(ctx);
170 
171    if (fd_binning_enabled)
172       fd2_emit_state_binning(ctx, ctx->dirty);
173 
174    fd2_emit_state(ctx, ctx->dirty);
175 
176    /* a2xx can draw only 65535 vertices at once
177     * on a22x the field in the draw command is 32bits but seems limited too
178     * using a limit of 32k because it fixes an unexplained hang
179     * 32766 works for all primitives (multiple of 2 and 3)
180     */
181    if (pdraw->count > 32766) {
182       /* clang-format off */
183       static const uint16_t step_tbl[PIPE_PRIM_MAX] = {
184          [0 ... PIPE_PRIM_MAX - 1]  = 32766,
185          [PIPE_PRIM_LINE_STRIP]     = 32765,
186          [PIPE_PRIM_TRIANGLE_STRIP] = 32764,
187 
188          /* needs more work */
189          [PIPE_PRIM_TRIANGLE_FAN]   = 0,
190          [PIPE_PRIM_LINE_LOOP]      = 0,
191       };
192       /* clang-format on */
193 
194 		struct pipe_draw_start_count_bias draw = *pdraw;
195       unsigned count = draw.count;
196       unsigned step = step_tbl[pinfo->mode];
197       unsigned num_vertices = ctx->batch->num_vertices;
198 
199       if (!step)
200          return false;
201 
202       for (; count + step > 32766; count -= step) {
203          draw.count = MIN2(count, 32766);
204          draw_impl(ctx, pinfo, &draw, ctx->batch->draw, index_offset, false);
205          draw_impl(ctx, pinfo, &draw, ctx->batch->binning, index_offset, true);
206          draw.start += step;
207          ctx->batch->num_vertices += step;
208       }
209       /* changing this value is a hack, restore it */
210       ctx->batch->num_vertices = num_vertices;
211    } else {
212       draw_impl(ctx, pinfo, pdraw, ctx->batch->draw, index_offset, false);
213       draw_impl(ctx, pinfo, pdraw, ctx->batch->binning, index_offset, true);
214    }
215 
216    fd_context_all_clean(ctx);
217 
218    return true;
219 }
220 
221 static void
clear_state(struct fd_batch * batch,struct fd_ringbuffer * ring,unsigned buffers,bool fast_clear)222 clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring,
223             unsigned buffers, bool fast_clear) assert_dt
224 {
225    struct fd_context *ctx = batch->ctx;
226    struct fd2_context *fd2_ctx = fd2_context(ctx);
227    uint32_t reg;
228 
229    fd2_emit_vertex_bufs(ring, 0x9c,
230                         (struct fd2_vertex_buf[]){
231                            {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
232                         },
233                         1);
234 
235    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
236    OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
237    OUT_RING(ring, 0);
238 
239    fd2_program_emit(ctx, ring, &ctx->solid_prog);
240 
241    OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
242    OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
243 
244    if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
245       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
246       OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
247       reg = 0;
248       if (buffers & PIPE_CLEAR_DEPTH) {
249          reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
250                 A2XX_RB_DEPTHCONTROL_Z_ENABLE |
251                 A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
252                 A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
253       }
254       if (buffers & PIPE_CLEAR_STENCIL) {
255          reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
256                 A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
257                 A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
258       }
259       OUT_RING(ring, reg);
260    }
261 
262    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
263    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
264    OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
265                      A2XX_RB_COLORCONTROL_BLEND_DISABLE |
266                      A2XX_RB_COLORCONTROL_ROP_CODE(12) |
267                      A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
268                      A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
269 
270    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
271    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
272    OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */
273    OUT_RING(
274       ring,
275       A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
276          A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
277          A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |
278          (fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));
279 
280    if (fast_clear) {
281       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
282       OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
283       OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));
284    }
285 
286    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
287    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
288    OUT_RING(ring, 0x0000ffff);
289 
290    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
291    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
292    if (buffers & PIPE_CLEAR_COLOR) {
293       OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
294                         A2XX_RB_COLOR_MASK_WRITE_GREEN |
295                         A2XX_RB_COLOR_MASK_WRITE_BLUE |
296                         A2XX_RB_COLOR_MASK_WRITE_ALPHA);
297    } else {
298       OUT_RING(ring, 0x0);
299    }
300 
301    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
302    OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
303    OUT_RING(ring, 0);
304 
305    if (is_a20x(batch->ctx->screen))
306       return;
307 
308    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
309    OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
310    OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
311    OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
312 
313    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
314    OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
315    OUT_RING(ring,
316             0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
317    OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
318 
319    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
320    OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
321    OUT_RING(ring, 0x00000084);
322 
323    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
324    OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
325    OUT_RING(ring, 0x0000028f);
326 }
327 
328 static void
clear_state_restore(struct fd_context * ctx,struct fd_ringbuffer * ring)329 clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
330 {
331    if (is_a20x(ctx->screen))
332       return;
333 
334    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
335    OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
336    OUT_RING(ring, 0x00000000);
337 
338    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
339    OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
340    OUT_RING(ring, 0x00000000);
341 
342    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
343    OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
344    OUT_RING(ring, 0x0000003b);
345 }
346 
347 static void
clear_fast(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t color_clear,uint32_t depth_clear,unsigned patch_type)348 clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,
349            uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)
350 {
351    BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */
352 
353    /* zero values are patched in */
354    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
355    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
356    OUT_RINGP(ring, patch_type, &batch->gmem_patches);
357 
358    OUT_PKT3(ring, CP_SET_CONSTANT, 4);
359    OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
360    OUT_RING(ring, 0x8000 | 32);
361    OUT_RING(ring, 0);
362    OUT_RING(ring, 0);
363 
364    /* set fill values */
365    if (!is_a20x(batch->ctx->screen)) {
366       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
367       OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
368       OUT_RING(ring, color_clear);
369 
370       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
371       OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
372       OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
373                         A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));
374 
375       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
376       OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
377       OUT_RING(ring, depth_clear);
378    } else {
379       const float sc = 1.0f / 255.0f;
380 
381       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
382       OUT_RING(ring, 0x00000480);
383       OUT_RING(ring, fui((float)(color_clear >> 0 & 0xff) * sc));
384       OUT_RING(ring, fui((float)(color_clear >> 8 & 0xff) * sc));
385       OUT_RING(ring, fui((float)(color_clear >> 16 & 0xff) * sc));
386       OUT_RING(ring, fui((float)(color_clear >> 24 & 0xff) * sc));
387 
388       // XXX if using float the rounding error breaks it..
389       float depth = ((double)(depth_clear >> 8)) * (1.0 / (double)0xffffff);
390       assert((unsigned)(((double)depth * (double)0xffffff)) ==
391              (depth_clear >> 8));
392 
393       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
394       OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
395       OUT_RING(ring, fui(0.0f));
396       OUT_RING(ring, fui(depth));
397 
398       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
399       OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
400       OUT_RING(ring,
401                0xff000000 |
402                   A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |
403                   A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
404       OUT_RING(ring, 0xff000000 |
405                         A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |
406                         A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
407    }
408 
409    fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
410            DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
411 }
412 
413 static bool
fd2_clear_fast(struct fd_context * ctx,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)414 fd2_clear_fast(struct fd_context *ctx, unsigned buffers,
415                const union pipe_color_union *color, double depth,
416                unsigned stencil) assert_dt
417 {
418    /* using 4x MSAA allows clearing ~2x faster
419     * then we can use higher bpp clearing to clear lower bpp
420     * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)
421     * note: its possible to clear with 32_32_32_32 format but its not faster
422     * note: fast clear doesn't work with sysmem rendering
423     * (sysmem rendering is disabled when clear is used)
424     *
425     * we only have 16-bit / 32-bit color formats
426     * and 16-bit / 32-bit depth formats
427     * so there are only a few possible combinations
428     *
429     * if the bpp of the color/depth doesn't match
430     * we clear with depth/color individually
431     */
432    struct fd2_context *fd2_ctx = fd2_context(ctx);
433    struct fd_batch *batch = ctx->batch;
434    struct fd_ringbuffer *ring = batch->draw;
435    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
436    uint32_t color_clear = 0, depth_clear = 0;
437    enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
438    int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */
439    int color_size = -1;
440 
441    /* TODO: need to test performance on a22x */
442    if (!is_a20x(ctx->screen))
443       return false;
444 
445    if (buffers & PIPE_CLEAR_COLOR)
446       color_size = util_format_get_blocksizebits(format) == 32;
447 
448    if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
449       /* no fast clear when clearing only one component of depth+stencil buffer */
450       if (!(buffers & PIPE_CLEAR_DEPTH))
451          return false;
452 
453       if ((pfb->zsbuf->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
454            pfb->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
455           !(buffers & PIPE_CLEAR_STENCIL))
456          return false;
457 
458       depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;
459    }
460 
461    assert(color_size >= 0 || depth_size >= 0);
462 
463    if (color_size == 0) {
464       color_clear = pack_rgba(format, color->f);
465       color_clear = (color_clear << 16) | (color_clear & 0xffff);
466    } else if (color_size == 1) {
467       color_clear = pack_rgba(format, color->f);
468    }
469 
470    if (depth_size == 0) {
471       depth_clear = (uint32_t)(0xffff * depth);
472       depth_clear |= depth_clear << 16;
473    } else if (depth_size == 1) {
474       depth_clear = (((uint32_t)(0xffffff * depth)) << 8);
475       depth_clear |= (stencil & 0xff);
476    }
477 
478    /* disable "window" scissor.. */
479    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
480    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
481    OUT_RING(ring, xy2d(0, 0));
482    OUT_RING(ring, xy2d(0x7fff, 0x7fff));
483 
484    /* make sure we fill all "pixels" (in SCREEN_SCISSOR) */
485    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
486    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
487    OUT_RING(ring, fui(4096.0f));
488    OUT_RING(ring, fui(4096.0f));
489    OUT_RING(ring, fui(4096.0f));
490    OUT_RING(ring, fui(4096.0f));
491 
492    clear_state(batch, ring, ~0u, true);
493 
494    if (color_size >= 0 && depth_size != color_size)
495       clear_fast(batch, ring, color_clear, color_clear,
496                  GMEM_PATCH_FASTCLEAR_COLOR);
497 
498    if (depth_size >= 0 && depth_size != color_size)
499       clear_fast(batch, ring, depth_clear, depth_clear,
500                  GMEM_PATCH_FASTCLEAR_DEPTH);
501 
502    if (depth_size == color_size)
503       clear_fast(batch, ring, color_clear, depth_clear,
504                  GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);
505 
506    clear_state_restore(ctx, ring);
507 
508    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
509    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
510    OUT_RING(ring, 0);
511 
512    /* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.
513     * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT
514     * the value is read from byte offset 60 in the given bo
515     */
516    OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);
517    OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);
518    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
519    OUT_RING(ring, 1);
520 
521    OUT_PKT3(ring, CP_SET_CONSTANT, 4);
522    OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
523    OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);
524    OUT_RING(ring, 0);
525    OUT_RING(ring, 0);
526    return true;
527 }
528 
529 static bool
fd2_clear(struct fd_context * ctx,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)530 fd2_clear(struct fd_context *ctx, unsigned buffers,
531           const union pipe_color_union *color, double depth,
532           unsigned stencil) assert_dt
533 {
534    struct fd_ringbuffer *ring = ctx->batch->draw;
535    struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
536 
537    if (fd2_clear_fast(ctx, buffers, color, depth, stencil))
538       goto dirty;
539 
540    /* set clear value */
541    if (is_a20x(ctx->screen)) {
542       if (buffers & PIPE_CLEAR_COLOR) {
543          /* C0 used by fragment shader */
544          OUT_PKT3(ring, CP_SET_CONSTANT, 5);
545          OUT_RING(ring, 0x00000480);
546          OUT_RING(ring, color->ui[0]);
547          OUT_RING(ring, color->ui[1]);
548          OUT_RING(ring, color->ui[2]);
549          OUT_RING(ring, color->ui[3]);
550       }
551 
552       if (buffers & PIPE_CLEAR_DEPTH) {
553          /* use viewport to set depth value */
554          OUT_PKT3(ring, CP_SET_CONSTANT, 3);
555          OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
556          OUT_RING(ring, fui(0.0f));
557          OUT_RING(ring, fui(depth));
558       }
559 
560       if (buffers & PIPE_CLEAR_STENCIL) {
561          OUT_PKT3(ring, CP_SET_CONSTANT, 3);
562          OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
563          OUT_RING(ring, 0xff000000 |
564                            A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |
565                            A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
566          OUT_RING(ring, 0xff000000 |
567                            A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |
568                            A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
569       }
570    } else {
571       if (buffers & PIPE_CLEAR_COLOR) {
572          OUT_PKT3(ring, CP_SET_CONSTANT, 2);
573          OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
574          OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));
575       }
576 
577       if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
578          uint32_t clear_mask, depth_clear;
579          switch (fd_pipe2depth(fb->zsbuf->format)) {
580          case DEPTHX_24_8:
581             clear_mask = ((buffers & PIPE_CLEAR_DEPTH) ? 0xe : 0) |
582                          ((buffers & PIPE_CLEAR_STENCIL) ? 0x1 : 0);
583             depth_clear =
584                (((uint32_t)(0xffffff * depth)) << 8) | (stencil & 0xff);
585             break;
586          case DEPTHX_16:
587             clear_mask = 0xf;
588             depth_clear = (uint32_t)(0xffffffff * depth);
589             break;
590          default:
591             unreachable("invalid depth");
592             break;
593          }
594 
595          OUT_PKT3(ring, CP_SET_CONSTANT, 2);
596          OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
597          OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
598                            A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));
599 
600          OUT_PKT3(ring, CP_SET_CONSTANT, 2);
601          OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
602          OUT_RING(ring, depth_clear);
603       }
604    }
605 
606    /* scissor state */
607    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
608    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
609    OUT_RING(ring, xy2d(0, 0));
610    OUT_RING(ring, xy2d(fb->width, fb->height));
611 
612    /* viewport state */
613    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
614    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
615    OUT_RING(ring, fui((float)fb->width / 2.0f));
616    OUT_RING(ring, fui((float)fb->width / 2.0f));
617    OUT_RING(ring, fui((float)fb->height / 2.0f));
618    OUT_RING(ring, fui((float)fb->height / 2.0f));
619 
620    /* common state */
621    clear_state(ctx->batch, ring, buffers, false);
622 
623    fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
624            DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
625 
626    clear_state_restore(ctx, ring);
627 
628 dirty:
629    ctx->dirty |= FD_DIRTY_ZSA | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER |
630                  FD_DIRTY_SAMPLE_MASK | FD_DIRTY_PROG | FD_DIRTY_CONST |
631                  FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
632 
633    ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG;
634    ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |=
635       FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
636 
637    return true;
638 }
639 
640 void
fd2_draw_init(struct pipe_context * pctx)641 fd2_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
642 {
643    struct fd_context *ctx = fd_context(pctx);
644    ctx->draw_vbo = fd2_draw_vbo;
645    ctx->clear = fd2_clear;
646 }
647