• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir/pipe_nir.h"
25 #include "util/format/u_format.h"
26 #include "util/perf/cpu_trace.h"
27 #include "util/u_surface.h"
28 #include "util/u_blitter.h"
29 #include "compiler/nir/nir_builder.h"
30 #include "v3d_context.h"
31 #include "broadcom/common/v3d_tiling.h"
32 #include "broadcom/common/v3d_tfu.h"
33 
34 /**
35  * The param @op_blit is used to tell if we are saving state for blitter_blit
36  * (if true) or blitter_clear (if false). If other blitter functions are used
37  * that require different state we may need something more elaborated than
38  * this.
39  */
40 
41 void
v3d_blitter_save(struct v3d_context * v3d,enum v3d_blitter_op op)42 v3d_blitter_save(struct v3d_context *v3d, enum v3d_blitter_op op)
43 {
44         util_blitter_save_fragment_constant_buffer_slot(v3d->blitter,
45                                                         v3d->constbuf[PIPE_SHADER_FRAGMENT].cb);
46         util_blitter_save_vertex_buffers(v3d->blitter, v3d->vertexbuf.vb, v3d->vertexbuf.count);
47         util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx);
48         util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs);
49         util_blitter_save_geometry_shader(v3d->blitter, v3d->prog.bind_gs);
50         util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
51                                      v3d->streamout.targets, MESA_PRIM_UNKNOWN);
52         util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer);
53         util_blitter_save_viewport(v3d->blitter, &v3d->viewport);
54         util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs);
55         util_blitter_save_blend(v3d->blitter, v3d->blend);
56         util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa);
57         util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref);
58         util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask, 0);
59         util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
60                                      v3d->streamout.targets, MESA_PRIM_UNKNOWN);
61 
62         if (op & V3D_SAVE_FRAMEBUFFER)
63                 util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer);
64 
65         if (op & V3D_SAVE_TEXTURES) {
66                 util_blitter_save_scissor(v3d->blitter, &v3d->scissor);
67                 util_blitter_save_fragment_sampler_states(v3d->blitter,
68                                                           v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers,
69                                                           (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers);
70                 util_blitter_save_fragment_sampler_views(v3d->blitter,
71                                                          v3d->tex[PIPE_SHADER_FRAGMENT].num_textures,
72                                                          v3d->tex[PIPE_SHADER_FRAGMENT].textures);
73         }
74 
75         if (!(op & V3D_DISABLE_RENDER_COND)) {
76                 util_blitter_save_render_condition(v3d->blitter, v3d->cond_query,
77                                                    v3d->cond_cond, v3d->cond_mode);
78         }
79 }
80 
81 static void
v3d_render_blit(struct pipe_context * ctx,struct pipe_blit_info * info)82 v3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
83 {
84         struct v3d_context *v3d = v3d_context(ctx);
85         struct v3d_resource *src = v3d_resource(info->src.resource);
86         struct pipe_resource *tiled = NULL;
87 
88         if (!info->mask)
89                 return;
90 
91         if (!src->tiled &&
92             info->src.resource->target != PIPE_TEXTURE_1D &&
93             info->src.resource->target != PIPE_TEXTURE_1D_ARRAY) {
94                 struct pipe_box box = {
95                         .x = 0,
96                         .y = 0,
97                         .width = u_minify(info->src.resource->width0,
98                                            info->src.level),
99                         .height = u_minify(info->src.resource->height0,
100                                            info->src.level),
101                         .depth = 1,
102                 };
103                 struct pipe_resource tmpl = {
104                         .target = info->src.resource->target,
105                         .format = info->src.resource->format,
106                         .width0 = box.width,
107                         .height0 = box.height,
108                         .depth0 = 1,
109                         .array_size = 1,
110                 };
111                 tiled = ctx->screen->resource_create(ctx->screen, &tmpl);
112                 if (!tiled) {
113                         fprintf(stderr, "Failed to create tiled blit temp\n");
114                         return;
115                 }
116                 ctx->resource_copy_region(ctx,
117                                           tiled, 0,
118                                           0, 0, 0,
119                                           info->src.resource, info->src.level,
120                                           &box);
121                 info->src.level = 0;
122                 info->src.resource = tiled;
123         }
124 
125         if (!util_blitter_is_blit_supported(v3d->blitter, info)) {
126                 fprintf(stderr, "blit unsupported %s -> %s\n",
127                     util_format_short_name(info->src.format),
128                     util_format_short_name(info->dst.format));
129                 return;
130         }
131 
132         MESA_TRACE_FUNC();
133 
134         v3d_blitter_save(v3d, info->render_condition_enable ?
135                          V3D_BLIT_COND : V3D_BLIT);
136         util_blitter_blit(v3d->blitter, info, NULL);
137 
138         pipe_resource_reference(&tiled, NULL);
139         info->mask = 0;
140 }
141 
142 /* Implement stencil blits by reinterpreting the stencil data as an RGBA8888
143  * or R8 texture.
144  */
145 static void
v3d_stencil_blit(struct pipe_context * ctx,struct pipe_blit_info * info)146 v3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
147 {
148         struct v3d_context *v3d = v3d_context(ctx);
149         struct v3d_resource *src = v3d_resource(info->src.resource);
150         struct v3d_resource *dst = v3d_resource(info->dst.resource);
151         enum pipe_format src_format, dst_format;
152 
153         if ((info->mask & PIPE_MASK_S) == 0)
154                 return;
155 
156         MESA_TRACE_FUNC();
157 
158         if (src->separate_stencil) {
159                 src = src->separate_stencil;
160                 src_format = PIPE_FORMAT_R8_UINT;
161         } else {
162                 src_format = PIPE_FORMAT_RGBA8888_UINT;
163         }
164 
165         if (dst->separate_stencil) {
166                 dst = dst->separate_stencil;
167                 dst_format = PIPE_FORMAT_R8_UINT;
168         } else {
169                 dst_format = PIPE_FORMAT_RGBA8888_UINT;
170         }
171 
172         /* Initialize the surface. */
173         struct pipe_surface dst_tmpl = {
174                 .u.tex = {
175                         .level = info->dst.level,
176                         .first_layer = info->dst.box.z,
177                         .last_layer = info->dst.box.z,
178                 },
179                 .format = dst_format,
180         };
181         struct pipe_surface *dst_surf =
182                 ctx->create_surface(ctx, &dst->base, &dst_tmpl);
183 
184         /* Initialize the sampler view. */
185         struct pipe_sampler_view src_tmpl = {
186                 .target = (src->base.target == PIPE_TEXTURE_CUBE_ARRAY) ?
187                           PIPE_TEXTURE_2D_ARRAY :
188                           src->base.target,
189                 .format = src_format,
190                 .u.tex = {
191                         .first_level = info->src.level,
192                         .last_level = info->src.level,
193                         .first_layer = 0,
194                         .last_layer = (PIPE_TEXTURE_3D ?
195                                        u_minify(src->base.depth0,
196                                                 info->src.level) - 1 :
197                                        src->base.array_size - 1),
198                 },
199                 .swizzle_r = PIPE_SWIZZLE_X,
200                 .swizzle_g = PIPE_SWIZZLE_Y,
201                 .swizzle_b = PIPE_SWIZZLE_Z,
202                 .swizzle_a = PIPE_SWIZZLE_W,
203         };
204         struct pipe_sampler_view *src_view =
205                 ctx->create_sampler_view(ctx, &src->base, &src_tmpl);
206 
207         v3d_blitter_save(v3d, info->render_condition_enable ?
208                          V3D_BLIT_COND : V3D_BLIT);
209         util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box,
210                                   src_view, &info->src.box,
211                                   src->base.width0, src->base.height0,
212                                   PIPE_MASK_R,
213                                   PIPE_TEX_FILTER_NEAREST,
214                                   info->scissor_enable ? &info->scissor : NULL,
215                                   info->alpha_blend, false, 0, NULL);
216 
217         pipe_surface_reference(&dst_surf, NULL);
218         pipe_sampler_view_reference(&src_view, NULL);
219 
220         info->mask &= ~PIPE_MASK_S;
221 }
222 
223 bool
v3d_generate_mipmap(struct pipe_context * pctx,struct pipe_resource * prsc,enum pipe_format format,unsigned int base_level,unsigned int last_level,unsigned int first_layer,unsigned int last_layer)224 v3d_generate_mipmap(struct pipe_context *pctx,
225                     struct pipe_resource *prsc,
226                     enum pipe_format format,
227                     unsigned int base_level,
228                     unsigned int last_level,
229                     unsigned int first_layer,
230                     unsigned int last_layer)
231 {
232         if (format != prsc->format)
233                 return false;
234 
235         /* We could maybe support looping over layers for array textures, but
236          * we definitely don't support 3D.
237          */
238         if (first_layer != last_layer)
239                 return false;
240 
241         struct v3d_context *v3d = v3d_context(pctx);
242         struct v3d_screen *screen = v3d->screen;
243         struct v3d_device_info *devinfo = &screen->devinfo;
244 
245         return v3d_X(devinfo, tfu)(pctx,
246                                    prsc, prsc,
247                                    base_level,
248                                    base_level, last_level,
249                                    first_layer, first_layer,
250                                    true);
251 }
252 
253 static void
v3d_tfu_blit(struct pipe_context * pctx,struct pipe_blit_info * info)254 v3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
255 {
256         int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
257         int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
258 
259         if ((info->mask & PIPE_MASK_RGBA) == 0)
260                 return;
261 
262         if (info->scissor_enable ||
263             info->swizzle_enable ||
264             info->dst.box.x != 0 ||
265             info->dst.box.y != 0 ||
266             info->dst.box.width != dst_width ||
267             info->dst.box.height != dst_height ||
268             info->dst.box.depth != 1 ||
269             info->src.box.x != 0 ||
270             info->src.box.y != 0 ||
271             info->src.box.width != info->dst.box.width ||
272             info->src.box.height != info->dst.box.height ||
273             info->src.box.depth != 1) {
274                 return;
275         }
276 
277         if (info->dst.format != info->src.format)
278                 return;
279 
280         struct v3d_context *v3d = v3d_context(pctx);
281         struct v3d_screen *screen = v3d->screen;
282         struct v3d_device_info *devinfo = &screen->devinfo;
283 
284         if (v3d_X(devinfo, tfu)(pctx, info->dst.resource, info->src.resource,
285                                 info->src.level,
286                                 info->dst.level, info->dst.level,
287                                 info->src.box.z, info->dst.box.z,
288                                 false)) {
289                 info->mask &= ~PIPE_MASK_RGBA;
290         }
291 }
292 
293 static struct pipe_surface *
v3d_get_blit_surface(struct pipe_context * pctx,struct pipe_resource * prsc,enum pipe_format format,unsigned level,int16_t layer)294 v3d_get_blit_surface(struct pipe_context *pctx,
295                      struct pipe_resource *prsc,
296                      enum pipe_format format,
297                      unsigned level,
298                      int16_t layer)
299 {
300         struct pipe_surface tmpl;
301 
302         tmpl.format = format;
303         tmpl.u.tex.level = level;
304         tmpl.u.tex.first_layer = layer;
305         tmpl.u.tex.last_layer = layer;
306 
307         return pctx->create_surface(pctx, prsc, &tmpl);
308 }
309 
310 static bool
is_tile_unaligned(unsigned size,unsigned tile_size)311 is_tile_unaligned(unsigned size, unsigned tile_size)
312 {
313         return size & (tile_size - 1);
314 }
315 
316 static bool
check_tlb_blit_ok(struct v3d_device_info * devinfo,struct pipe_blit_info * info)317 check_tlb_blit_ok(struct v3d_device_info *devinfo, struct pipe_blit_info *info)
318 {
319         if (!info->mask)
320                 return false;
321 
322         bool is_color_blit = info->mask & PIPE_MASK_RGBA;
323         bool is_depth_blit = info->mask & PIPE_MASK_Z;
324         bool is_stencil_blit = info->mask & PIPE_MASK_S;
325 
326         /* We should receive either a depth/stencil blit, or color blit, but
327          * not both.
328          */
329         assert ((is_color_blit && !is_depth_blit && !is_stencil_blit) ||
330                 (!is_color_blit && (is_depth_blit || is_stencil_blit)));
331 
332         if (info->scissor_enable || info->swizzle_enable)
333                 return false;
334 
335         if (info->src.box.x != info->dst.box.x ||
336             info->src.box.y != info->dst.box.y ||
337             info->src.box.width != info->dst.box.width ||
338             info->src.box.height != info->dst.box.height)
339                 return false;
340 
341         if (is_color_blit &&
342             util_format_is_depth_or_stencil(info->dst.format))
343                 return false;
344 
345         if ((is_depth_blit || is_stencil_blit) &&
346             !util_format_is_depth_or_stencil(info->dst.format))
347                 return false;
348 
349         if (!v3d_rt_format_supported(devinfo, info->src.format))
350                 return false;
351 
352         if (v3d_get_rt_format(devinfo, info->src.format) !=
353             v3d_get_rt_format(devinfo, info->dst.format))
354                 return false;
355 
356         bool is_msaa_resolve = (info->src.resource->nr_samples > 1 &&
357                                 info->dst.resource->nr_samples < 2);
358 
359         if (is_msaa_resolve &&
360             !v3d_format_supports_tlb_msaa_resolve(devinfo, info->src.format))
361                 return false;
362 
363         return true;
364 }
365 
366 /* This checks if we can implement the blit straight from a job that we have
367  * not yet flushed, including MSAA resolves.
368  */
369 static void
v3d_tlb_blit_fast(struct pipe_context * pctx,struct pipe_blit_info * info)370 v3d_tlb_blit_fast(struct pipe_context *pctx, struct pipe_blit_info *info)
371 {
372         struct v3d_context *v3d = v3d_context(pctx);
373         struct v3d_screen *screen = v3d->screen;
374         struct v3d_device_info *devinfo = &screen->devinfo;
375 
376         if (!check_tlb_blit_ok(devinfo, info))
377                 return;
378 
379         /* FIXME: support depth/stencil */
380         if (!(info->mask & PIPE_MASK_RGBA))
381                 return;
382 
383         /* Can't blit from 1x to 4x since the render target configuration
384          * for the job would not be compatible.
385          */
386         if (info->src.resource->nr_samples < info->dst.resource->nr_samples)
387                 return;
388 
389         /* Can't blit form RGBX to RGBA since we can't force A=1 on tile
390          * stores.
391          */
392         if (util_format_has_alpha1(info->src.format) &&
393             !util_format_has_alpha1(info->dst.format))
394             return;
395 
396         /* Find the job that writes the blit source */
397         struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
398                                                            info->src.resource);
399         if (!entry)
400                 return;
401 
402         struct v3d_job *job = entry->data;
403         assert(job);
404 
405         /* The TLB store will involve the same area and tiles as the job
406          * writing to the resource, so only do this if we are blitting the
407          * full resource and the job is writing the full resource.
408          */
409         int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
410         int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
411         if (info->dst.box.x != 0 || info->dst.box.width != dst_width ||
412             info->dst.box.y != 0 || info->dst.box.height != dst_height ||
413             job->draw_min_x != 0 || job->draw_min_y != 0 ||
414             job->draw_max_x != dst_width || job->draw_max_y != dst_height) {
415                 return;
416         }
417 
418         /* Blits are specified for single-layered FBOs, if the job that
419          * produces the blit source is multilayered we would attempt to
420          * blit all layers and write out of bounds on the destination.
421          */
422         if (job->num_layers > 1)
423                 return;
424 
425         /* Find which color attachment in the job is the blit source  */
426         int idx = -1;
427         for (int i = 0; i < job->nr_cbufs; i++) {
428                 if (!job->cbufs[i] ||
429                     job->cbufs[i]->texture != info->src.resource) {
430                         continue;
431                 }
432                 idx = i;
433                 break;
434         }
435 
436         if (idx < 0)
437                 return;
438 
439         struct pipe_surface *dbuf =
440                 v3d_get_blit_surface(pctx, info->dst.resource,
441                                      info->dst.format, info->dst.level,
442                                      info->dst.box.z);
443 
444         /* The job's RT setup must be compatible with the blit buffer. */
445         struct v3d_surface *ssurf = v3d_surface(job->cbufs[idx]);
446         struct v3d_surface *rsurf = v3d_surface(dbuf);
447         if (ssurf->internal_bpp < rsurf->internal_bpp)
448                 return;
449         if (ssurf->internal_type != rsurf->internal_type)
450                 return;
451 
452         MESA_TRACE_FUNC();
453 
454         /* If we had any other jobs writing to the blit dst we should submit
455          * them now before we blit.
456          *
457          * FIXME: We could just drop these jobs completely if they are
458          * rendering a subset of the resource being blit here.
459          */
460         v3d_flush_jobs_writing_resource(v3d, info->dst.resource,
461                                         V3D_FLUSH_DEFAULT, false);
462 
463         /* Program the job to blit from the TLB into the destination buffer */
464         info->mask &= ~PIPE_MASK_RGBA;
465         job->blit_tlb |= PIPE_CLEAR_COLOR0 << idx;
466         job->dbuf = dbuf;
467         v3d_job_add_write_resource(job, info->dst.resource);
468 
469         /* Submit the job immediately, since otherwise we could accumulate
470          * draw calls happening after the blit.
471          */
472         v3d_job_submit(v3d, job);
473 }
474 
475 static void
v3d_tlb_blit(struct pipe_context * pctx,struct pipe_blit_info * info)476 v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
477 {
478         struct v3d_context *v3d = v3d_context(pctx);
479         struct v3d_screen *screen = v3d->screen;
480         struct v3d_device_info *devinfo = &screen->devinfo;
481 
482         if (!check_tlb_blit_ok(devinfo, info))
483                 return;
484 
485         MESA_TRACE_FUNC();
486 
487         v3d_flush_jobs_writing_resource(v3d, info->src.resource, V3D_FLUSH_DEFAULT, false);
488 
489         struct pipe_surface *dst_surf =
490            v3d_get_blit_surface(pctx, info->dst.resource, info->dst.format, info->dst.level, info->dst.box.z);
491         struct pipe_surface *src_surf =
492            v3d_get_blit_surface(pctx, info->src.resource, info->src.format, info->src.level, info->src.box.z);
493 
494         bool is_color_blit = info->mask & PIPE_MASK_RGBA;
495         bool is_depth_blit = info->mask & PIPE_MASK_Z;
496         bool is_stencil_blit = info->mask & PIPE_MASK_S;
497 
498         struct pipe_surface *surfaces[V3D_MAX_DRAW_BUFFERS] = { 0 };
499         if (is_color_blit)
500                 surfaces[0] = dst_surf;
501 
502         bool msaa = (info->src.resource->nr_samples > 1 ||
503                      info->dst.resource->nr_samples > 1);
504 
505         bool double_buffer = false;
506 
507         uint32_t tile_width, tile_height, max_bpp;
508         v3d_get_tile_buffer_size(devinfo, msaa, double_buffer,
509                                  is_color_blit ? 1 : 0, surfaces, src_surf,
510                                  &tile_width, &tile_height, &max_bpp);
511 
512         int dst_surface_width = u_minify(info->dst.resource->width0,
513                                          info->dst.level);
514         int dst_surface_height = u_minify(info->dst.resource->height0,
515                                          info->dst.level);
516         if (is_tile_unaligned(info->dst.box.x, tile_width) ||
517             is_tile_unaligned(info->dst.box.y, tile_height) ||
518             (is_tile_unaligned(info->dst.box.width, tile_width) &&
519              info->dst.box.x + info->dst.box.width != dst_surface_width) ||
520             (is_tile_unaligned(info->dst.box.height, tile_height) &&
521              info->dst.box.y + info->dst.box.height != dst_surface_height)) {
522                 pipe_surface_reference(&dst_surf, NULL);
523                 pipe_surface_reference(&src_surf, NULL);
524                 return;
525         }
526 
527         struct v3d_job *job = v3d_get_job(v3d,
528                                           is_color_blit ? 1u : 0u,
529                                           surfaces,
530                                           is_color_blit ? NULL : dst_surf,
531                                           src_surf);
532         job->msaa = msaa;
533         job->double_buffer = double_buffer;
534         job->can_use_double_buffer = !job->msaa && V3D_DBG(DOUBLE_BUFFER);
535         job->internal_bpp = max_bpp;
536         job->draw_min_x = info->dst.box.x;
537         job->draw_min_y = info->dst.box.y;
538         job->draw_max_x = info->dst.box.x + info->dst.box.width;
539         job->draw_max_y = info->dst.box.y + info->dst.box.height;
540         job->scissor.disabled = false;
541 
542         /* The simulator complains if we do a TLB load from a source with a
543          * stride that is smaller than the destination's, so we program the
544          * 'frame region' to match the smallest dimensions of the two surfaces.
545          * This should be fine because we only get here if the src and dst boxes
546          * match, so we know the blit involves the same tiles on both surfaces.
547          */
548         job->draw_width = MIN2(dst_surf->width, src_surf->width);
549         job->draw_height = MIN2(dst_surf->height, src_surf->height);
550 
551         job->tile_desc.width = tile_width;
552         job->tile_desc.height = tile_height;
553         job->tile_desc.draw_x = DIV_ROUND_UP(job->draw_width,
554                                              job->tile_desc.width);
555         job->tile_desc.draw_y = DIV_ROUND_UP(job->draw_height,
556                                              job->tile_desc.height);
557 
558         job->needs_flush = true;
559         job->num_layers = info->dst.box.depth;
560 
561         job->store = 0;
562         if (is_color_blit) {
563                 job->store |= PIPE_CLEAR_COLOR0;
564                 info->mask &= ~PIPE_MASK_RGBA;
565         }
566         if (is_depth_blit) {
567                 job->store |= PIPE_CLEAR_DEPTH;
568                 info->mask &= ~PIPE_MASK_Z;
569         }
570         if (is_stencil_blit){
571                 job->store |= PIPE_CLEAR_STENCIL;
572                 info->mask &= ~PIPE_MASK_S;
573         }
574 
575         v3d_X(devinfo, start_binning)(v3d, job);
576 
577         v3d_job_submit(v3d, job);
578 
579         pipe_surface_reference(&dst_surf, NULL);
580         pipe_surface_reference(&src_surf, NULL);
581 }
582 
583 /**
584  * Creates the VS of the custom blit shader to convert YUV plane from
585  * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
586  * This vertex shader is mostly a pass-through VS.
587  */
588 static void *
v3d_get_sand8_vs(struct pipe_context * pctx)589 v3d_get_sand8_vs(struct pipe_context *pctx)
590 {
591         struct v3d_context *v3d = v3d_context(pctx);
592         struct pipe_screen *pscreen = pctx->screen;
593 
594         if (v3d->sand8_blit_vs)
595                 return v3d->sand8_blit_vs;
596 
597         const struct nir_shader_compiler_options *options =
598                 pscreen->get_compiler_options(pscreen,
599                                               PIPE_SHADER_IR_NIR,
600                                               PIPE_SHADER_VERTEX);
601 
602         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
603                                                        options,
604                                                        "sand8_blit_vs");
605 
606         const struct glsl_type *vec4 = glsl_vec4_type();
607         nir_variable *pos_in = nir_variable_create(b.shader,
608                                                    nir_var_shader_in,
609                                                    vec4, "pos");
610 
611         nir_variable *pos_out = nir_variable_create(b.shader,
612                                                     nir_var_shader_out,
613                                                     vec4, "gl_Position");
614         pos_out->data.location = VARYING_SLOT_POS;
615         nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
616 
617         v3d->sand8_blit_vs = pipe_shader_from_nir(pctx, b.shader);
618 
619         return v3d->sand8_blit_vs;
620 }
621 /**
622  * Creates the FS of the custom blit shader to convert YUV plane from
623  * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
624  * The result texture is equivalent to a chroma (cpp=2) or luma (cpp=1)
625  * plane for a NV12 format without the SAND modifier.
626  */
627 static void *
v3d_get_sand8_fs(struct pipe_context * pctx,int cpp)628 v3d_get_sand8_fs(struct pipe_context *pctx, int cpp)
629 {
630         struct v3d_context *v3d = v3d_context(pctx);
631         struct pipe_screen *pscreen = pctx->screen;
632         struct pipe_shader_state **cached_shader;
633         const char *name;
634 
635         if (cpp == 1) {
636                 cached_shader = &v3d->sand8_blit_fs_luma;
637                 name = "sand8_blit_fs_luma";
638         } else {
639                 cached_shader = &v3d->sand8_blit_fs_chroma;
640                 name = "sand8_blit_fs_chroma";
641         }
642 
643         if (*cached_shader)
644                 return *cached_shader;
645 
646         const struct nir_shader_compiler_options *options =
647                 pscreen->get_compiler_options(pscreen,
648                                               PIPE_SHADER_IR_NIR,
649                                               PIPE_SHADER_FRAGMENT);
650 
651         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
652                                                        options, "%s", name);
653         b.shader->info.num_ubos = 1;
654         b.shader->num_outputs = 1;
655         b.shader->num_inputs = 1;
656         b.shader->num_uniforms = 1;
657 
658         const struct glsl_type *vec4 = glsl_vec4_type();
659 
660         const struct glsl_type *glsl_uint = glsl_uint_type();
661 
662         nir_variable *color_out =
663                 nir_variable_create(b.shader, nir_var_shader_out,
664                                     vec4, "f_color");
665         color_out->data.location = FRAG_RESULT_COLOR;
666 
667         nir_variable *pos_in =
668                 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
669         pos_in->data.location = VARYING_SLOT_POS;
670         nir_def *pos = nir_load_var(&b, pos_in);
671 
672         nir_def *zero = nir_imm_int(&b, 0);
673         nir_def *one = nir_imm_int(&b, 1);
674         nir_def *two = nir_imm_int(&b, 2);
675         nir_def *six = nir_imm_int(&b, 6);
676         nir_def *seven = nir_imm_int(&b, 7);
677         nir_def *eight = nir_imm_int(&b, 8);
678 
679         nir_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
680         nir_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
681 
682         nir_variable *stride_in =
683                 nir_variable_create(b.shader, nir_var_uniform, glsl_uint,
684                                     "sand8_stride");
685         nir_def *stride =
686                 nir_load_uniform(&b, 1, 32, zero,
687                                  .base = stride_in->data.driver_location,
688                                  .range = 4,
689                                  .dest_type = nir_type_uint32);
690 
691         nir_def *x_offset;
692         nir_def *y_offset;
693 
694         /* UIF tiled format is composed by UIF blocks, Each block has
695          * four 64 byte microtiles. Inside each microtile pixels are stored
696          * in raster format. But microtiles have different dimensions
697          * based in the bits per pixel of the image.
698          *
699          *   8bpp microtile dimensions are 8x8
700          *  16bpp microtile dimensions are 8x4
701          *  32bpp microtile dimensions are 4x4
702          *
703          * As we are reading and writing with 32bpp to optimize
704          * the number of texture operations during the blit, we need
705          * to adjust the offsets were we read and write as data will
706          * be later read using 8bpp (luma) and 16bpp (chroma).
707          *
708          * For chroma 8x4 16bpp raster order is compatible with 4x4
709          * 32bpp. In both layouts each line has 8*2 == 4*4 == 16 bytes.
710          * But luma 8x8 8bpp raster order is not compatible
711          * with 4x4 32bpp. 8bpp has 8 bytes per line, and 32bpp has
712          * 16 bytes per line. So if we read a 8bpp texture that was
713          * written as 32bpp texture. Bytes would be misplaced.
714          *
715          * inter/intra_utile_x_offsets takes care of mapping the offsets
716          * between microtiles to deal with this issue for luma planes.
717          */
718         if (cpp == 1) {
719                 nir_def *intra_utile_x_offset =
720                         nir_ishl(&b, nir_iand_imm(&b, x, 1), two);
721                 nir_def *inter_utile_x_offset =
722                         nir_ishl(&b, nir_iand_imm(&b, x, 60), one);
723                 nir_def *stripe_offset=
724                         nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 6),
725                                              stride),
726                                  seven);
727 
728                 x_offset = nir_iadd(&b, stripe_offset,
729                                         nir_iadd(&b, intra_utile_x_offset,
730                                                      inter_utile_x_offset));
731                 y_offset = nir_iadd(&b,
732                                     nir_ishl(&b, nir_iand_imm(&b, x, 2), six),
733                                     nir_ishl(&b, y, eight));
734         } else  {
735                 nir_def *stripe_offset=
736                         nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 5),
737                                                 stride),
738                                  seven);
739                 x_offset = nir_iadd(&b, stripe_offset,
740                                nir_ishl(&b, nir_iand_imm(&b, x, 31), two));
741                 y_offset = nir_ishl(&b, y, seven);
742         }
743         nir_def *ubo_offset = nir_iadd(&b, x_offset, y_offset);
744         nir_def *load =
745         nir_load_ubo(&b, 1, 32, zero, ubo_offset,
746                     .align_mul = 4,
747                     .align_offset = 0,
748                     .range_base = 0,
749                     .range = ~0);
750 
751         nir_def *output = nir_unpack_unorm_4x8(&b, load);
752 
753         nir_store_var(&b, color_out,
754                       output,
755                       0xF);
756 
757 
758         *cached_shader = pipe_shader_from_nir(pctx, b.shader);
759 
760         return *cached_shader;
761 }
762 
763 /**
764  * Turns NV12 with SAND8 format modifier from raster-order with interleaved
765  * luma and chroma 128-byte-wide-columns to tiled format for luma and chroma.
766  *
767  * This implementation is based on vc4_yuv_blit.
768  */
769 static void
v3d_sand8_blit(struct pipe_context * pctx,struct pipe_blit_info * info)770 v3d_sand8_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
771 {
772         struct v3d_context *v3d = v3d_context(pctx);
773         struct v3d_resource *src = v3d_resource(info->src.resource);
774         ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource);
775 
776         if (!src->sand_col128_stride)
777                 return;
778         if (src->tiled)
779                 return;
780         if (src->base.format != PIPE_FORMAT_R8_UNORM &&
781             src->base.format != PIPE_FORMAT_R8G8_UNORM)
782                 return;
783         if (!(info->mask & PIPE_MASK_RGBA))
784                 return;
785 
786         assert(dst->base.format == src->base.format);
787         assert(dst->tiled);
788 
789         assert(info->src.box.x == 0 && info->dst.box.x == 0);
790         assert(info->src.box.y == 0 && info->dst.box.y == 0);
791         assert(info->src.box.width == info->dst.box.width);
792         assert(info->src.box.height == info->dst.box.height);
793 
794         MESA_TRACE_FUNC();
795 
796         v3d_blitter_save(v3d, info->render_condition_enable ?
797                          V3D_BLIT_COND : V3D_BLIT);
798 
799         struct pipe_surface dst_tmpl;
800         util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
801                                          info->dst.level, info->dst.box.z);
802         /* Although the src textures are cpp=1 or cpp=2, the dst texture
803          * uses a cpp=4 dst texture. So, all read/write texture ops will
804          * be done using 32-bit read and writes.
805          */
806         dst_tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM;
807         struct pipe_surface *dst_surf =
808                 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
809         if (!dst_surf) {
810                 fprintf(stderr, "Failed to create YUV dst surface\n");
811                 util_blitter_unset_running_flag(v3d->blitter);
812                 return;
813         }
814 
815         uint32_t sand8_stride = src->sand_col128_stride;
816 
817         /* Adjust the dimensions of dst luma/chroma to match src
818          * size now we are using a cpp=4 format. Next dimension take into
819          * account the UIF microtile layouts.
820          */
821         dst_surf->width = align(dst_surf->width, 8) / 2;
822         if (src->cpp == 1)
823                 dst_surf->height /= 2;
824 
825         /* Set the constant buffer. */
826         struct pipe_constant_buffer cb_uniforms = {
827                 .user_buffer = &sand8_stride,
828                 .buffer_size = sizeof(sand8_stride),
829         };
830 
831         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false,
832                                   &cb_uniforms);
833         struct pipe_constant_buffer saved_fs_cb1 = { 0 };
834         pipe_resource_reference(&saved_fs_cb1.buffer,
835                                 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1].buffer);
836         memcpy(&saved_fs_cb1, &v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1],
837                sizeof(struct pipe_constant_buffer));
838         struct pipe_constant_buffer cb_src = {
839                 .buffer = info->src.resource,
840                 .buffer_offset = src->slices[info->src.level].offset,
841                 .buffer_size = (src->bo->size -
842                                 src->slices[info->src.level].offset),
843         };
844         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false,
845                                   &cb_src);
846         /* Unbind the textures, to make sure we don't try to recurse into the
847          * shadow blit.
848          */
849         pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL);
850         pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
851 
852         util_blitter_custom_shader(v3d->blitter, dst_surf,
853                                    v3d_get_sand8_vs(pctx),
854                                    v3d_get_sand8_fs(pctx, src->cpp));
855 
856         util_blitter_restore_textures(v3d->blitter);
857         util_blitter_restore_constant_buffer_state(v3d->blitter);
858 
859         /* Restore cb1 (util_blitter doesn't handle this one). */
860         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, true,
861                                   &saved_fs_cb1);
862 
863         pipe_surface_reference(&dst_surf, NULL);
864 
865         info->mask &= ~PIPE_MASK_RGBA;
866 }
867 
868 
869 /**
870  * Creates the VS of the custom blit shader to convert YUV plane from
871  * the P030 format with BROADCOM_SAND_COL128 modifier to UIF tiled P010
872  * format.
873  * This vertex shader is mostly a pass-through VS.
874  */
875 static void *
v3d_get_sand30_vs(struct pipe_context * pctx)876 v3d_get_sand30_vs(struct pipe_context *pctx)
877 {
878         struct v3d_context *v3d = v3d_context(pctx);
879         struct pipe_screen *pscreen = pctx->screen;
880 
881         if (v3d->sand30_blit_vs)
882                 return v3d->sand30_blit_vs;
883 
884         const struct nir_shader_compiler_options *options =
885                 pscreen->get_compiler_options(pscreen,
886                                               PIPE_SHADER_IR_NIR,
887                                               PIPE_SHADER_VERTEX);
888 
889         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
890                                                        options,
891                                                        "sand30_blit_vs");
892 
893         const struct glsl_type *vec4 = glsl_vec4_type();
894         nir_variable *pos_in = nir_variable_create(b.shader,
895                                                    nir_var_shader_in,
896                                                    vec4, "pos");
897 
898         nir_variable *pos_out = nir_variable_create(b.shader,
899                                                     nir_var_shader_out,
900                                                     vec4, "gl_Position");
901         pos_out->data.location = VARYING_SLOT_POS;
902         nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
903 
904         v3d->sand30_blit_vs = pipe_shader_from_nir(pctx, b.shader);
905 
906         return v3d->sand30_blit_vs;
907 }
908 
909 /**
910  * Given an uvec2 value with rgb10a2 components, it extracts four 10-bit
911  * components, then converts them from unorm10 to unorm16 and returns them
912  * in an uvec4. The start parameter defines where the sequence of 4 values
913  * begins.
914  */
915 static nir_def *
extract_unorm_2xrgb10a2_component_to_4xunorm16(nir_builder * b,nir_def * value,nir_def * start)916 extract_unorm_2xrgb10a2_component_to_4xunorm16(nir_builder *b,
917                                                nir_def *value,
918                                                nir_def *start)
919 {
920         const unsigned mask = BITFIELD_MASK(10);
921 
922         nir_def *shiftw0 = nir_imul_imm(b, start, 10);
923         nir_def *word0 = nir_iand_imm(b, nir_channel(b, value, 0),
924                                           BITFIELD_MASK(30));
925         nir_def *finalword0 = nir_ushr(b, word0, shiftw0);
926         nir_def *word1 = nir_channel(b, value, 1);
927         nir_def *shiftw0tow1 = nir_isub_imm(b, 30, shiftw0);
928         nir_def *word1toword0 =  nir_ishl(b, word1, shiftw0tow1);
929         finalword0 = nir_ior(b, finalword0, word1toword0);
930         nir_def *finalword1 = nir_ushr(b, word1, shiftw0);
931 
932         nir_def *val0 = nir_ishl_imm(b, nir_iand_imm(b, finalword0,
933                                                          mask), 6);
934         nir_def *val1 = nir_ishr_imm(b, nir_iand_imm(b, finalword0,
935                                                          mask << 10), 4);
936         nir_def *val2 = nir_ishr_imm(b, nir_iand_imm(b, finalword0,
937                                                          mask << 20), 14);
938         nir_def *val3 = nir_ishl_imm(b, nir_iand_imm(b, finalword1,
939                                                          mask), 6);
940 
941         return nir_vec4(b, val0, val1, val2, val3);
942 }
943 
944 /**
945  * Creates the FS of the custom blit shader to convert YUV plane from
946  * the P030 format with BROADCOM_SAND_COL128 modifier to UIF tiled P10
947  * format a 16-bit representation per component.
948  *
949  * The result texture is equivalent to a chroma (cpp=4) or luma (cpp=2)
950  * plane for a P010 format without the SAND128 modifier.
951  */
952 static void *
v3d_get_sand30_fs(struct pipe_context * pctx)953 v3d_get_sand30_fs(struct pipe_context *pctx)
954 {
955         struct v3d_context *v3d = v3d_context(pctx);
956         struct pipe_screen *pscreen = pctx->screen;
957 
958         if (v3d->sand30_blit_fs)
959                 return  v3d->sand30_blit_fs;
960 
961         const struct nir_shader_compiler_options *options =
962                 pscreen->get_compiler_options(pscreen,
963                                               PIPE_SHADER_IR_NIR,
964                                               PIPE_SHADER_FRAGMENT);
965 
966         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
967                                                        options,
968                                                        "sand30_blit_fs");
969         b.shader->info.num_ubos = 1;
970         b.shader->num_outputs = 1;
971         b.shader->num_inputs = 1;
972         b.shader->num_uniforms = 1;
973 
974         const struct glsl_type *vec4 = glsl_vec4_type();
975 
976         const struct glsl_type *glsl_uint = glsl_uint_type();
977         const struct glsl_type *glsl_uvec4 = glsl_vector_type(GLSL_TYPE_UINT,
978                                                               4);
979 
980         nir_variable *color_out = nir_variable_create(b.shader,
981                                                       nir_var_shader_out,
982                                                       glsl_uvec4, "f_color");
983         color_out->data.location = FRAG_RESULT_COLOR;
984 
985         nir_variable *pos_in =
986                 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
987         pos_in->data.location = VARYING_SLOT_POS;
988         nir_def *pos = nir_load_var(&b, pos_in);
989 
990         nir_def *zero = nir_imm_int(&b, 0);
991         nir_def *three = nir_imm_int(&b, 3);
992 
993         /* With a SAND128 stripe, in 128-bytes with rgb10a2 format we have 96
994          * 10-bit values. So, it represents 96 pixels for Y plane and 48 pixels
995          * for UV frame, but as we are reading 4 10-bit-values at a time we
996          * will have 24 groups (pixels) of 4 10-bit values.
997          */
998         uint32_t pixels_stripe = 24;
999 
1000         nir_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
1001         nir_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
1002 
1003         /* UIF tiled format is composed by UIF blocks. Each block has four 64
1004          * byte microtiles. Inside each microtile pixels are stored in raster
1005          * format. But microtiles have different dimensions based in the bits
1006          * per pixel of the image.
1007          *
1008          *  16bpp microtile dimensions are 8x4
1009          *  32bpp microtile dimensions are 4x4
1010          *  64bpp microtile dimensions are 4x2
1011          *
1012          * As we are reading and writing with 64bpp to optimize the number of
1013          * texture operations during the blit, we adjust the offsets so when
1014          * the microtile is sampled using the 16bpp (luma) and the 32bpp
1015          * (chroma) the expected pixels are in the correct position, that
1016          * would be different if we were using a 64bpp sampling.
1017          *
1018          * For luma 8x4 16bpp and chroma 4x4 32bpp luma raster order is
1019          * incompatible with 4x2 64bpp. 16bpp has 16 bytes per line, 32bpp has
1020          * also 16byte per line. But 64bpp has 32 bytes per line. So if we
1021          * read a 16bpp or 32bpp texture that was written as 64bpp texture,
1022          * pixels would be misplaced.
1023          *
1024          * inter/intra_utile_x_offsets takes care of mapping the offsets
1025          * between microtiles to deal with this issue for luma and chroma
1026          * planes.
1027          *
1028          * We reduce the luma and chroma planes to the same blit case
1029          * because 16bpp and 32bpp have compatible microtile raster layout.
1030          * So just doubling the width of the chroma plane before calling the
1031          * blit makes them equivalent.
1032          */
1033         nir_variable *stride_in =
1034                 nir_variable_create(b.shader, nir_var_uniform,
1035                                     glsl_uint, "sand30_stride");
1036         nir_def *stride =
1037                 nir_load_uniform(&b, 1, 32, zero,
1038                                  .base = stride_in->data.driver_location,
1039                                  .range = 4,
1040                                  .dest_type = nir_type_uint32);
1041 
1042         nir_def *real_x = nir_ior(&b, nir_iand_imm(&b, x, 1),
1043                                       nir_ishl_imm(&b,nir_ushr_imm(&b, x, 2),
1044                                       1));
1045         nir_def *x_pos_in_stripe = nir_umod_imm(&b, real_x, pixels_stripe);
1046         nir_def *component = nir_umod(&b, real_x, three);
1047         nir_def *intra_utile_x_offset = nir_ishl_imm(&b, component, 2);
1048 
1049         nir_def *inter_utile_x_offset =
1050                 nir_ishl_imm(&b, nir_udiv_imm(&b, x_pos_in_stripe, 3), 4);
1051 
1052         nir_def *stripe_offset=
1053                 nir_ishl_imm(&b,
1054                              nir_imul(&b,
1055                                       nir_udiv_imm(&b, real_x, pixels_stripe),
1056                                       stride),
1057                              7);
1058 
1059         nir_def *x_offset = nir_iadd(&b, stripe_offset,
1060                                          nir_iadd(&b, intra_utile_x_offset,
1061                                                   inter_utile_x_offset));
1062         nir_def *y_offset =
1063                 nir_iadd(&b, nir_ishl_imm(&b, nir_iand_imm(&b, x, 2), 6),
1064                          nir_ishl_imm(&b, y, 8));
1065         nir_def *ubo_offset = nir_iadd(&b, x_offset, y_offset);
1066 
1067         nir_def *load = nir_load_ubo(&b, 2, 32, zero, ubo_offset,
1068                                          .align_mul = 8,
1069                                          .align_offset = 0,
1070                                          .range_base = 0,
1071                                          .range = ~0);
1072         nir_def *output =
1073                 extract_unorm_2xrgb10a2_component_to_4xunorm16(&b, load,
1074                                                                component);
1075         nir_store_var(&b, color_out,
1076                       output,
1077                       0xf);
1078 
1079         v3d->sand30_blit_fs = pipe_shader_from_nir(pctx, b.shader);
1080 
1081         return v3d->sand30_blit_fs;
1082 }
1083 
1084 /**
1085  * Turns P030 with SAND30 format modifier from raster-order with interleaved
1086  * luma and chroma 128-byte-wide-columns to a P010 UIF tiled format for luma
1087  * and chroma.
1088  */
1089 static void
v3d_sand30_blit(struct pipe_context * pctx,struct pipe_blit_info * info)1090 v3d_sand30_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
1091 {
1092         struct v3d_context *v3d = v3d_context(pctx);
1093         struct v3d_resource *src = v3d_resource(info->src.resource);
1094         ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource);
1095 
1096         if (!src->sand_col128_stride)
1097                 return;
1098         if (src->tiled)
1099                 return;
1100         if (src->base.format != PIPE_FORMAT_R16_UNORM &&
1101             src->base.format != PIPE_FORMAT_R16G16_UNORM)
1102                 return;
1103         if (!(info->mask & PIPE_MASK_RGBA))
1104                 return;
1105         if (info->swizzle_enable)
1106                 return;
1107         assert(dst->base.format == src->base.format);
1108         assert(dst->tiled);
1109 
1110         assert(info->src.box.x == 0 && info->dst.box.x == 0);
1111         assert(info->src.box.y == 0 && info->dst.box.y == 0);
1112         assert(info->src.box.width == info->dst.box.width);
1113         assert(info->src.box.height == info->dst.box.height);
1114 
1115         MESA_TRACE_FUNC();
1116 
1117         v3d_blitter_save(v3d, info->render_condition_enable ?
1118                          V3D_BLIT_COND : V3D_BLIT);
1119 
1120         struct pipe_surface dst_tmpl;
1121         util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
1122                                          info->dst.level, info->dst.box.z);
1123 
1124         dst_tmpl.format = PIPE_FORMAT_R16G16B16A16_UINT;
1125 
1126         struct pipe_surface *dst_surf =
1127                 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
1128         if (!dst_surf) {
1129                 fprintf(stderr, "Failed to create YUV dst surface\n");
1130                 util_blitter_unset_running_flag(v3d->blitter);
1131                 return;
1132         }
1133 
1134         uint32_t sand30_stride = src->sand_col128_stride;
1135 
1136         /* Adjust the dimensions of dst luma/chroma to match src
1137          * size now we are using a cpp=8 format. Next dimension take into
1138          * account the UIF microtile layouts.
1139          */
1140         dst_surf->height /= 2;
1141         dst_surf->width = align(dst_surf->width, 8);
1142         if (src->cpp == 2)
1143                 dst_surf->width /= 2;
1144         /* Set the constant buffer. */
1145         struct pipe_constant_buffer cb_uniforms = {
1146                 .user_buffer = &sand30_stride,
1147                 .buffer_size = sizeof(sand30_stride),
1148         };
1149 
1150         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false,
1151                                   &cb_uniforms);
1152 
1153         struct pipe_constant_buffer saved_fs_cb1 = { 0 };
1154         pipe_resource_reference(&saved_fs_cb1.buffer,
1155                                 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1].buffer);
1156         memcpy(&saved_fs_cb1, &v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1],
1157                sizeof(struct pipe_constant_buffer));
1158         struct pipe_constant_buffer cb_src = {
1159                 .buffer = info->src.resource,
1160                 .buffer_offset = src->slices[info->src.level].offset,
1161                 .buffer_size = (src->bo->size -
1162                                 src->slices[info->src.level].offset),
1163         };
1164         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false,
1165                                   &cb_src);
1166         /* Unbind the textures, to make sure we don't try to recurse into the
1167          * shadow blit.
1168          */
1169         pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false,
1170                                 NULL);
1171         pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
1172 
1173         util_blitter_custom_shader(v3d->blitter, dst_surf,
1174                                    v3d_get_sand30_vs(pctx),
1175                                    v3d_get_sand30_fs(pctx));
1176 
1177         util_blitter_restore_textures(v3d->blitter);
1178         util_blitter_restore_constant_buffer_state(v3d->blitter);
1179 
1180         /* Restore cb1 (util_blitter doesn't handle this one). */
1181         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, true,
1182                                   &saved_fs_cb1);
1183         pipe_surface_reference(&dst_surf, NULL);
1184 
1185         info->mask &= ~PIPE_MASK_RGBA;
1186         return;
1187 }
1188 
1189 /* Optimal hardware path for blitting pixels.
1190  * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1191  */
1192 void
v3d_blit(struct pipe_context * pctx,const struct pipe_blit_info * blit_info)1193 v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1194 {
1195         struct v3d_context *v3d = v3d_context(pctx);
1196         struct pipe_blit_info info = *blit_info;
1197 
1198         if (info.render_condition_enable && !v3d_render_condition_check(v3d))
1199                 return;
1200 
1201         MESA_TRACE_FUNC();
1202 
1203         v3d_sand30_blit(pctx, &info);
1204 
1205         v3d_sand8_blit(pctx, &info);
1206 
1207         v3d_tlb_blit_fast(pctx, &info);
1208 
1209         v3d_tfu_blit(pctx, &info);
1210 
1211         v3d_tlb_blit(pctx, &info);
1212 
1213         v3d_stencil_blit(pctx, &info);
1214 
1215         v3d_render_blit(pctx, &info);
1216 
1217         /* Flush our blit jobs immediately.  They're unlikely to get reused by
1218          * normal drawing or other blits, and without flushing we can easily
1219          * run into unexpected OOMs when blits are used for a large series of
1220          * texture uploads before using the textures.
1221          */
1222         v3d_flush_jobs_writing_resource(v3d, info.dst.resource,
1223                                         V3D_FLUSH_DEFAULT, false);
1224 }
1225