• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2014 Scott Mansell
3  * Copyright © 2014 Broadcom
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #include "util/u_blitter.h"
26 #include "util/u_prim.h"
27 #include "util/u_format.h"
28 #include "util/u_pack_color.h"
29 #include "util/u_upload_mgr.h"
30 #include "indices/u_primconvert.h"
31 
32 #include "vc4_context.h"
33 #include "vc4_resource.h"
34 
35 #define VC4_HW_2116_COUNT		0x1ef0
36 
37 static void
vc4_get_draw_cl_space(struct vc4_job * job,int vert_count)38 vc4_get_draw_cl_space(struct vc4_job *job, int vert_count)
39 {
40         /* The SW-5891 workaround may cause us to emit multiple shader recs
41          * and draw packets.
42          */
43         int num_draws = DIV_ROUND_UP(vert_count, 65535) + 1;
44 
45         /* Binner gets our packet state -- vc4_emit.c contents,
46          * and the primitive itself.
47          */
48         cl_ensure_space(&job->bcl,
49                         256 + (VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE +
50                                VC4_PACKET_GL_SHADER_STATE_SIZE) * num_draws);
51 
52         /* Nothing for rcl -- that's covered by vc4_context.c */
53 
54         /* shader_rec gets up to 12 dwords of reloc handles plus a maximally
55          * sized shader_rec (104 bytes base for 8 vattrs plus 32 bytes of
56          * vattr stride).
57          */
58         cl_ensure_space(&job->shader_rec,
59                         (12 * sizeof(uint32_t) + 104 + 8 * 32) * num_draws);
60 
61         /* Uniforms are covered by vc4_write_uniforms(). */
62 
63         /* There could be up to 16 textures per stage, plus misc other
64          * pointers.
65          */
66         cl_ensure_space(&job->bo_handles, (2 * 16 + 20) * sizeof(uint32_t));
67         cl_ensure_space(&job->bo_pointers,
68                         (2 * 16 + 20) * sizeof(struct vc4_bo *));
69 }
70 
71 /**
72  * Does the initial bining command list setup for drawing to a given FBO.
73  */
74 static void
vc4_start_draw(struct vc4_context * vc4)75 vc4_start_draw(struct vc4_context *vc4)
76 {
77         struct vc4_job *job = vc4->job;
78 
79         if (job->needs_flush)
80                 return;
81 
82         vc4_get_draw_cl_space(job, 0);
83 
84         struct vc4_cl_out *bcl = cl_start(&job->bcl);
85         //   Tile state data is 48 bytes per tile, I think it can be thrown away
86         //   as soon as binning is finished.
87         cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
88         cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
89         cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
90         cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
91         cl_u8(&bcl, job->draw_tiles_x);
92         cl_u8(&bcl, job->draw_tiles_y);
93         /* Other flags are filled by kernel. */
94         cl_u8(&bcl, job->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0);
95 
96         /* START_TILE_BINNING resets the statechange counters in the hardware,
97          * which are what is used when a primitive is binned to a tile to
98          * figure out what new state packets need to be written to that tile's
99          * command list.
100          */
101         cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
102 
103         /* Reset the current compressed primitives format.  This gets modified
104          * by VC4_PACKET_GL_INDEXED_PRIMITIVE and
105          * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
106          * of every tile.
107          */
108         cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
109         cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
110                      VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
111 
112         job->needs_flush = true;
113         job->draw_width = vc4->framebuffer.width;
114         job->draw_height = vc4->framebuffer.height;
115 
116         cl_end(&job->bcl, bcl);
117 }
118 
119 static void
vc4_predraw_check_textures(struct pipe_context * pctx,struct vc4_texture_stateobj * stage_tex)120 vc4_predraw_check_textures(struct pipe_context *pctx,
121                            struct vc4_texture_stateobj *stage_tex)
122 {
123         struct vc4_context *vc4 = vc4_context(pctx);
124 
125         for (int i = 0; i < stage_tex->num_textures; i++) {
126                 struct pipe_sampler_view *view = stage_tex->textures[i];
127                 if (!view)
128                         continue;
129                 struct vc4_resource *rsc = vc4_resource(view->texture);
130                 if (rsc->shadow_parent)
131                         vc4_update_shadow_baselevel_texture(pctx, view);
132 
133                 vc4_flush_jobs_writing_resource(vc4, view->texture);
134         }
135 }
136 
137 static void
vc4_emit_gl_shader_state(struct vc4_context * vc4,const struct pipe_draw_info * info,uint32_t extra_index_bias)138 vc4_emit_gl_shader_state(struct vc4_context *vc4,
139                          const struct pipe_draw_info *info,
140                          uint32_t extra_index_bias)
141 {
142         struct vc4_job *job = vc4->job;
143         /* VC4_DIRTY_VTXSTATE */
144         struct vc4_vertex_stateobj *vtx = vc4->vtx;
145         /* VC4_DIRTY_VTXBUF */
146         struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;
147 
148         /* The simulator throws a fit if VS or CS don't read an attribute, so
149          * we emit a dummy read.
150          */
151         uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
152         /* Emit the shader record. */
153         struct vc4_cl_out *shader_rec =
154                 cl_start_shader_reloc(&job->shader_rec, 3 + num_elements_emit);
155         /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */
156         cl_u16(&shader_rec,
157                VC4_SHADER_FLAG_ENABLE_CLIPPING |
158                (vc4->prog.fs->fs_threaded ?
159                 0 : VC4_SHADER_FLAG_FS_SINGLE_THREAD) |
160                ((info->mode == PIPE_PRIM_POINTS &&
161                  vc4->rasterizer->base.point_size_per_vertex) ?
162                 VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
163 
164         /* VC4_DIRTY_COMPILED_FS */
165         cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */
166         cl_u8(&shader_rec, vc4->prog.fs->num_inputs);
167         cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
168         cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
169 
170         /* VC4_DIRTY_COMPILED_VS */
171         cl_u16(&shader_rec, 0); /* vs num uniforms */
172         cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
173         cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
174         cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
175         cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
176 
177         /* VC4_DIRTY_COMPILED_CS */
178         cl_u16(&shader_rec, 0); /* cs num uniforms */
179         cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
180         cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
181         cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
182         cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
183 
184         uint32_t max_index = 0xffff;
185         for (int i = 0; i < vtx->num_elements; i++) {
186                 struct pipe_vertex_element *elem = &vtx->pipe[i];
187                 struct pipe_vertex_buffer *vb =
188                         &vertexbuf->vb[elem->vertex_buffer_index];
189                 struct vc4_resource *rsc = vc4_resource(vb->buffer);
190                 /* not vc4->dirty tracked: vc4->last_index_bias */
191                 uint32_t offset = (vb->buffer_offset +
192                                    elem->src_offset +
193                                    vb->stride * (info->index_bias +
194                                                  extra_index_bias));
195                 uint32_t vb_size = rsc->bo->size - offset;
196                 uint32_t elem_size =
197                         util_format_get_blocksize(elem->src_format);
198 
199                 cl_reloc(job, &job->shader_rec, &shader_rec, rsc->bo, offset);
200                 cl_u8(&shader_rec, elem_size - 1);
201                 cl_u8(&shader_rec, vb->stride);
202                 cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]);
203                 cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[i]);
204 
205                 if (vb->stride > 0) {
206                         max_index = MIN2(max_index,
207                                          (vb_size - elem_size) / vb->stride);
208                 }
209         }
210 
211         if (vtx->num_elements == 0) {
212                 assert(num_elements_emit == 1);
213                 struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
214                 cl_reloc(job, &job->shader_rec, &shader_rec, bo, 0);
215                 cl_u8(&shader_rec, 16 - 1); /* element size */
216                 cl_u8(&shader_rec, 0); /* stride */
217                 cl_u8(&shader_rec, 0); /* VS VPM offset */
218                 cl_u8(&shader_rec, 0); /* CS VPM offset */
219                 vc4_bo_unreference(&bo);
220         }
221         cl_end(&job->shader_rec, shader_rec);
222 
223         struct vc4_cl_out *bcl = cl_start(&job->bcl);
224         /* the actual draw call. */
225         cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
226         assert(vtx->num_elements <= 8);
227         /* Note that number of attributes == 0 in the packet means 8
228          * attributes.  This field also contains the offset into shader_rec.
229          */
230         cl_u32(&bcl, num_elements_emit & 0x7);
231         cl_end(&job->bcl, bcl);
232 
233         vc4_write_uniforms(vc4, vc4->prog.fs,
234                            &vc4->constbuf[PIPE_SHADER_FRAGMENT],
235                            &vc4->fragtex);
236         vc4_write_uniforms(vc4, vc4->prog.vs,
237                            &vc4->constbuf[PIPE_SHADER_VERTEX],
238                            &vc4->verttex);
239         vc4_write_uniforms(vc4, vc4->prog.cs,
240                            &vc4->constbuf[PIPE_SHADER_VERTEX],
241                            &vc4->verttex);
242 
243         vc4->last_index_bias = info->index_bias + extra_index_bias;
244         vc4->max_index = max_index;
245         job->shader_rec_count++;
246 }
247 
248 /**
249  * HW-2116 workaround: Flush the batch before triggering the hardware state
250  * counter wraparound behavior.
251  *
252  * State updates are tracked by a global counter which increments at the first
253  * state update after a draw or a START_BINNING.  Tiles can then have their
254  * state updated at draw time with a set of cheap checks for whether the
255  * state's copy of the global counter matches the global counter the last time
256  * that state was written to the tile.
257  *
258  * The state counters are relatively small and wrap around quickly, so you
259  * could get false negatives for needing to update a particular state in the
260  * tile.  To avoid this, the hardware attempts to write all of the state in
261  * the tile at wraparound time.  This apparently is broken, so we just flush
262  * everything before that behavior is triggered.  A batch flush is sufficient
263  * to get our current contents drawn and reset the counters to 0.
264  *
265  * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the
266  * tiles with VC4_PACKET_RETURN_FROM_LIST.
267  */
268 static void
vc4_hw_2116_workaround(struct pipe_context * pctx,int vert_count)269 vc4_hw_2116_workaround(struct pipe_context *pctx, int vert_count)
270 {
271         struct vc4_context *vc4 = vc4_context(pctx);
272         struct vc4_job *job = vc4_get_job_for_fbo(vc4);
273 
274         if (job->draw_calls_queued + vert_count / 65535 >= VC4_HW_2116_COUNT) {
275                 perf_debug("Flushing batch due to HW-2116 workaround "
276                            "(too many draw calls per scene\n");
277                 vc4_job_submit(vc4, job);
278         }
279 }
280 
281 static void
vc4_draw_vbo(struct pipe_context * pctx,const struct pipe_draw_info * info)282 vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
283 {
284         struct vc4_context *vc4 = vc4_context(pctx);
285 
286         if (info->mode >= PIPE_PRIM_QUADS) {
287                 util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
288                 util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
289                 util_primconvert_draw_vbo(vc4->primconvert, info);
290                 perf_debug("Fallback conversion for %d %s vertices\n",
291                            info->count, u_prim_name(info->mode));
292                 return;
293         }
294 
295         /* Before setting up the draw, do any fixup blits necessary. */
296         vc4_predraw_check_textures(pctx, &vc4->verttex);
297         vc4_predraw_check_textures(pctx, &vc4->fragtex);
298 
299         vc4_hw_2116_workaround(pctx, info->count);
300 
301         struct vc4_job *job = vc4_get_job_for_fbo(vc4);
302 
303         vc4_get_draw_cl_space(job, info->count);
304 
305         if (vc4->prim_mode != info->mode) {
306                 vc4->prim_mode = info->mode;
307                 vc4->dirty |= VC4_DIRTY_PRIM_MODE;
308         }
309 
310         vc4_start_draw(vc4);
311         if (!vc4_update_compiled_shaders(vc4, info->mode)) {
312                 debug_warn_once("shader compile failed, skipping draw call.\n");
313                 return;
314         }
315 
316         vc4_emit_state(pctx);
317 
318         if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
319                            VC4_DIRTY_VTXSTATE |
320                            VC4_DIRTY_PRIM_MODE |
321                            VC4_DIRTY_RASTERIZER |
322                            VC4_DIRTY_COMPILED_CS |
323                            VC4_DIRTY_COMPILED_VS |
324                            VC4_DIRTY_COMPILED_FS |
325                            vc4->prog.cs->uniform_dirty_bits |
326                            vc4->prog.vs->uniform_dirty_bits |
327                            vc4->prog.fs->uniform_dirty_bits)) ||
328             vc4->last_index_bias != info->index_bias) {
329                 vc4_emit_gl_shader_state(vc4, info, 0);
330         }
331 
332         vc4->dirty = 0;
333 
334         /* Note that the primitive type fields match with OpenGL/gallium
335          * definitions, up to but not including QUADS.
336          */
337         struct vc4_cl_out *bcl = cl_start(&job->bcl);
338         if (info->indexed) {
339                 uint32_t offset = vc4->indexbuf.offset;
340                 uint32_t index_size = vc4->indexbuf.index_size;
341                 struct pipe_resource *prsc;
342                 if (vc4->indexbuf.index_size == 4) {
343                         prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf,
344                                                            info->count, &offset);
345                         index_size = 2;
346                 } else {
347                         if (vc4->indexbuf.user_buffer) {
348                                 prsc = NULL;
349                                 u_upload_data(vc4->uploader, 0,
350                                               info->count * index_size, 4,
351                                               vc4->indexbuf.user_buffer,
352                                               &offset, &prsc);
353                         } else {
354                                 prsc = vc4->indexbuf.buffer;
355                         }
356                 }
357                 struct vc4_resource *rsc = vc4_resource(prsc);
358 
359                 cl_start_reloc(&job->bcl, &bcl, 1);
360                 cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
361                 cl_u8(&bcl,
362                       info->mode |
363                       (index_size == 2 ?
364                        VC4_INDEX_BUFFER_U16:
365                        VC4_INDEX_BUFFER_U8));
366                 cl_u32(&bcl, info->count);
367                 cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset);
368                 cl_u32(&bcl, vc4->max_index);
369                 job->draw_calls_queued++;
370 
371                 if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer)
372                         pipe_resource_reference(&prsc, NULL);
373         } else {
374                 uint32_t count = info->count;
375                 uint32_t start = info->start;
376                 uint32_t extra_index_bias = 0;
377 
378                 while (count) {
379                         uint32_t this_count = count;
380                         uint32_t step = count;
381                         static const uint32_t max_verts = 65535;
382 
383                         /* GFXH-515 / SW-5891: The binner emits 16 bit indices
384                          * for drawarrays, which means that if start + count >
385                          * 64k it would truncate the top bits.  Work around
386                          * this by emitting a limited number of primitives at
387                          * a time and reemitting the shader state pointing
388                          * farther down the vertex attribute arrays.
389                          *
390                          * To do this properly for line loops or trifans, we'd
391                          * need to make a new VB containing the first vertex
392                          * plus whatever remainder.
393                          */
394                         if (extra_index_bias) {
395                                 cl_end(&job->bcl, bcl);
396                                 vc4_emit_gl_shader_state(vc4, info,
397                                                          extra_index_bias);
398                                 bcl = cl_start(&job->bcl);
399                         }
400 
401                         if (start + count > max_verts) {
402                                 switch (info->mode) {
403                                 case PIPE_PRIM_POINTS:
404                                         this_count = step = max_verts;
405                                         break;
406                                 case PIPE_PRIM_LINES:
407                                         this_count = step = max_verts - (max_verts % 2);
408                                         break;
409                                 case PIPE_PRIM_LINE_STRIP:
410                                         this_count = max_verts;
411                                         step = max_verts - 1;
412                                         break;
413                                 case PIPE_PRIM_LINE_LOOP:
414                                         this_count = max_verts;
415                                         step = max_verts - 1;
416                                         debug_warn_once("unhandled line loop "
417                                                         "looping behavior with "
418                                                         ">65535 verts\n");
419                                         break;
420                                 case PIPE_PRIM_TRIANGLES:
421                                         this_count = step = max_verts - (max_verts % 3);
422                                         break;
423                                 case PIPE_PRIM_TRIANGLE_STRIP:
424                                         this_count = max_verts;
425                                         step = max_verts - 2;
426                                         break;
427                                 default:
428                                         debug_warn_once("unhandled primitive "
429                                                         "max vert count, truncating\n");
430                                         this_count = step = max_verts;
431                                 }
432                         }
433 
434                         cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
435                         cl_u8(&bcl, info->mode);
436                         cl_u32(&bcl, this_count);
437                         cl_u32(&bcl, start);
438                         job->draw_calls_queued++;
439 
440                         count -= step;
441                         extra_index_bias += start + step;
442                         start = 0;
443                 }
444         }
445         cl_end(&job->bcl, bcl);
446 
447         /* We shouldn't have tripped the HW_2116 bug with the GFXH-515
448          * workaround.
449          */
450         assert(job->draw_calls_queued <= VC4_HW_2116_COUNT);
451 
452         if (vc4->zsa && vc4->framebuffer.zsbuf) {
453                 struct vc4_resource *rsc =
454                         vc4_resource(vc4->framebuffer.zsbuf->texture);
455 
456                 if (vc4->zsa->base.depth.enabled) {
457                         job->resolve |= PIPE_CLEAR_DEPTH;
458                         rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
459                 }
460 
461                 if (vc4->zsa->base.stencil[0].enabled) {
462                         job->resolve |= PIPE_CLEAR_STENCIL;
463                         rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
464                 }
465         }
466 
467         job->resolve |= PIPE_CLEAR_COLOR0;
468 
469         /* If we've used half of the presumably 256MB CMA area, flush the job
470          * so that we don't accumulate a job that will end up not being
471          * executable.
472          */
473         if (job->bo_space > 128 * 1024 * 1024)
474                 vc4_flush(pctx);
475 
476         if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH)
477                 vc4_flush(pctx);
478 }
479 
480 static uint32_t
pack_rgba(enum pipe_format format,const float * rgba)481 pack_rgba(enum pipe_format format, const float *rgba)
482 {
483         union util_color uc;
484         util_pack_color(rgba, format, &uc);
485         if (util_format_get_blocksize(format) == 2)
486                 return uc.us;
487         else
488                 return uc.ui[0];
489 }
490 
491 static void
vc4_clear(struct pipe_context * pctx,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)492 vc4_clear(struct pipe_context *pctx, unsigned buffers,
493           const union pipe_color_union *color, double depth, unsigned stencil)
494 {
495         struct vc4_context *vc4 = vc4_context(pctx);
496         struct vc4_job *job = vc4_get_job_for_fbo(vc4);
497 
498         /* We can't flag new buffers for clearing once we've queued draws.  We
499          * could avoid this by using the 3d engine to clear.
500          */
501         if (job->draw_calls_queued) {
502                 perf_debug("Flushing rendering to process new clear.\n");
503                 vc4_job_submit(vc4, job);
504                 job = vc4_get_job_for_fbo(vc4);
505         }
506 
507         if (buffers & PIPE_CLEAR_COLOR0) {
508                 struct vc4_resource *rsc =
509                         vc4_resource(vc4->framebuffer.cbufs[0]->texture);
510                 uint32_t clear_color;
511 
512                 if (vc4_rt_format_is_565(vc4->framebuffer.cbufs[0]->format)) {
513                         /* In 565 mode, the hardware will be packing our color
514                          * for us.
515                          */
516                         clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM,
517                                                 color->f);
518                 } else {
519                         /* Otherwise, we need to do this packing because we
520                          * support multiple swizzlings of RGBA8888.
521                          */
522                         clear_color =
523                                 pack_rgba(vc4->framebuffer.cbufs[0]->format,
524                                           color->f);
525                 }
526                 job->clear_color[0] = job->clear_color[1] = clear_color;
527                 rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0);
528         }
529 
530         if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
531                 struct vc4_resource *rsc =
532                         vc4_resource(vc4->framebuffer.zsbuf->texture);
533                 unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;
534 
535                 /* Clearing ZS will clear both Z and stencil, so if we're
536                  * trying to clear just one then we need to draw a quad to do
537                  * it instead.
538                  */
539                 if ((zsclear == PIPE_CLEAR_DEPTH ||
540                      zsclear == PIPE_CLEAR_STENCIL) &&
541                     (rsc->initialized_buffers & ~(zsclear | job->cleared)) &&
542                     util_format_is_depth_and_stencil(vc4->framebuffer.zsbuf->format)) {
543                         perf_debug("Partial clear of Z+stencil buffer, "
544                                    "drawing a quad instead of fast clearing\n");
545                         vc4_blitter_save(vc4);
546                         util_blitter_clear(vc4->blitter,
547                                            vc4->framebuffer.width,
548                                            vc4->framebuffer.height,
549                                            1,
550                                            zsclear,
551                                            NULL, depth, stencil);
552                         buffers &= ~zsclear;
553                         if (!buffers)
554                                 return;
555                 }
556 
557                 /* Though the depth buffer is stored with Z in the high 24,
558                  * for this field we just need to store it in the low 24.
559                  */
560                 if (buffers & PIPE_CLEAR_DEPTH) {
561                         job->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
562                                                        depth);
563                 }
564                 if (buffers & PIPE_CLEAR_STENCIL)
565                         job->clear_stencil = stencil;
566 
567                 rsc->initialized_buffers |= zsclear;
568         }
569 
570         job->draw_min_x = 0;
571         job->draw_min_y = 0;
572         job->draw_max_x = vc4->framebuffer.width;
573         job->draw_max_y = vc4->framebuffer.height;
574         job->cleared |= buffers;
575         job->resolve |= buffers;
576 
577         vc4_start_draw(vc4);
578 }
579 
580 static void
vc4_clear_render_target(struct pipe_context * pctx,struct pipe_surface * ps,const union pipe_color_union * color,unsigned x,unsigned y,unsigned w,unsigned h,bool render_condition_enabled)581 vc4_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
582                         const union pipe_color_union *color,
583                         unsigned x, unsigned y, unsigned w, unsigned h,
584 			bool render_condition_enabled)
585 {
586         fprintf(stderr, "unimpl: clear RT\n");
587 }
588 
589 static void
vc4_clear_depth_stencil(struct pipe_context * pctx,struct pipe_surface * ps,unsigned buffers,double depth,unsigned stencil,unsigned x,unsigned y,unsigned w,unsigned h,bool render_condition_enabled)590 vc4_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
591                         unsigned buffers, double depth, unsigned stencil,
592                         unsigned x, unsigned y, unsigned w, unsigned h,
593 			bool render_condition_enabled)
594 {
595         fprintf(stderr, "unimpl: clear DS\n");
596 }
597 
598 void
vc4_draw_init(struct pipe_context * pctx)599 vc4_draw_init(struct pipe_context *pctx)
600 {
601         pctx->draw_vbo = vc4_draw_vbo;
602         pctx->clear = vc4_clear;
603         pctx->clear_render_target = vc4_clear_render_target;
604         pctx->clear_depth_stencil = vc4_clear_depth_stencil;
605 }
606