• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir/pipe_nir.h"
25 #include "util/format/u_format.h"
26 #include "util/u_surface.h"
27 #include "util/u_blitter.h"
28 #include "compiler/nir/nir_builder.h"
29 #include "v3d_context.h"
30 #include "broadcom/common/v3d_tiling.h"
31 #include "broadcom/common/v3d_tfu.h"
32 
33 /**
34  * The param @op_blit is used to tell if we are saving state for blitter_blit
35  * (if true) or blitter_clear (if false). If other blitter functions are used
36  * that require different state we may need something more elaborated than
37  * this.
38  */
39 
40 void
v3d_blitter_save(struct v3d_context * v3d,bool op_blit,bool render_cond)41 v3d_blitter_save(struct v3d_context *v3d, bool op_blit, bool render_cond)
42 {
43         util_blitter_save_fragment_constant_buffer_slot(v3d->blitter,
44                                                         v3d->constbuf[PIPE_SHADER_FRAGMENT].cb);
45         util_blitter_save_vertex_buffers(v3d->blitter, v3d->vertexbuf.vb, v3d->vertexbuf.count);
46         util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx);
47         util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs);
48         util_blitter_save_geometry_shader(v3d->blitter, v3d->prog.bind_gs);
49         util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
50                                      v3d->streamout.targets);
51         util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer);
52         util_blitter_save_viewport(v3d->blitter, &v3d->viewport);
53         util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs);
54         util_blitter_save_blend(v3d->blitter, v3d->blend);
55         util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa);
56         util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref);
57         util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask, 0);
58         util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
59                                      v3d->streamout.targets);
60         util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer);
61 
62         if (op_blit) {
63                 util_blitter_save_scissor(v3d->blitter, &v3d->scissor);
64                 util_blitter_save_fragment_sampler_states(v3d->blitter,
65                                                           v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers,
66                                                           (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers);
67                 util_blitter_save_fragment_sampler_views(v3d->blitter,
68                                                          v3d->tex[PIPE_SHADER_FRAGMENT].num_textures,
69                                                          v3d->tex[PIPE_SHADER_FRAGMENT].textures);
70         }
71 
72         if (!render_cond) {
73                 util_blitter_save_render_condition(v3d->blitter, v3d->cond_query,
74                                                    v3d->cond_cond, v3d->cond_mode);
75         }
76 }
77 
78 static void
v3d_render_blit(struct pipe_context * ctx,struct pipe_blit_info * info)79 v3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
80 {
81         struct v3d_context *v3d = v3d_context(ctx);
82         struct v3d_resource *src = v3d_resource(info->src.resource);
83         struct pipe_resource *tiled = NULL;
84 
85         if (!info->mask)
86                 return;
87 
88         if (!src->tiled &&
89             info->src.resource->target != PIPE_TEXTURE_1D &&
90             info->src.resource->target != PIPE_TEXTURE_1D_ARRAY) {
91                 struct pipe_box box = {
92                         .x = 0,
93                         .y = 0,
94                         .width = u_minify(info->src.resource->width0,
95                                            info->src.level),
96                         .height = u_minify(info->src.resource->height0,
97                                            info->src.level),
98                         .depth = 1,
99                 };
100                 struct pipe_resource tmpl = {
101                         .target = info->src.resource->target,
102                         .format = info->src.resource->format,
103                         .width0 = box.width,
104                         .height0 = box.height,
105                         .depth0 = 1,
106                         .array_size = 1,
107                 };
108                 tiled = ctx->screen->resource_create(ctx->screen, &tmpl);
109                 if (!tiled) {
110                         fprintf(stderr, "Failed to create tiled blit temp\n");
111                         return;
112                 }
113                 ctx->resource_copy_region(ctx,
114                                           tiled, 0,
115                                           0, 0, 0,
116                                           info->src.resource, info->src.level,
117                                           &box);
118                 info->src.level = 0;
119                 info->src.resource = tiled;
120         }
121 
122         if (!util_blitter_is_blit_supported(v3d->blitter, info)) {
123                 fprintf(stderr, "blit unsupported %s -> %s\n",
124                     util_format_short_name(info->src.format),
125                     util_format_short_name(info->dst.format));
126                 return;
127         }
128 
129         v3d_blitter_save(v3d, true, info->render_condition_enable);
130         util_blitter_blit(v3d->blitter, info);
131 
132         pipe_resource_reference(&tiled, NULL);
133         info->mask = 0;
134 }
135 
136 /* Implement stencil blits by reinterpreting the stencil data as an RGBA8888
137  * or R8 texture.
138  */
139 static void
v3d_stencil_blit(struct pipe_context * ctx,struct pipe_blit_info * info)140 v3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
141 {
142         struct v3d_context *v3d = v3d_context(ctx);
143         struct v3d_resource *src = v3d_resource(info->src.resource);
144         struct v3d_resource *dst = v3d_resource(info->dst.resource);
145         enum pipe_format src_format, dst_format;
146 
147         if ((info->mask & PIPE_MASK_S) == 0)
148                 return;
149 
150         if (src->separate_stencil) {
151                 src = src->separate_stencil;
152                 src_format = PIPE_FORMAT_R8_UINT;
153         } else {
154                 src_format = PIPE_FORMAT_RGBA8888_UINT;
155         }
156 
157         if (dst->separate_stencil) {
158                 dst = dst->separate_stencil;
159                 dst_format = PIPE_FORMAT_R8_UINT;
160         } else {
161                 dst_format = PIPE_FORMAT_RGBA8888_UINT;
162         }
163 
164         /* Initialize the surface. */
165         struct pipe_surface dst_tmpl = {
166                 .u.tex = {
167                         .level = info->dst.level,
168                         .first_layer = info->dst.box.z,
169                         .last_layer = info->dst.box.z,
170                 },
171                 .format = dst_format,
172         };
173         struct pipe_surface *dst_surf =
174                 ctx->create_surface(ctx, &dst->base, &dst_tmpl);
175 
176         /* Initialize the sampler view. */
177         struct pipe_sampler_view src_tmpl = {
178                 .target = (src->base.target == PIPE_TEXTURE_CUBE_ARRAY) ?
179                           PIPE_TEXTURE_2D_ARRAY :
180                           src->base.target,
181                 .format = src_format,
182                 .u.tex = {
183                         .first_level = info->src.level,
184                         .last_level = info->src.level,
185                         .first_layer = 0,
186                         .last_layer = (PIPE_TEXTURE_3D ?
187                                        u_minify(src->base.depth0,
188                                                 info->src.level) - 1 :
189                                        src->base.array_size - 1),
190                 },
191                 .swizzle_r = PIPE_SWIZZLE_X,
192                 .swizzle_g = PIPE_SWIZZLE_Y,
193                 .swizzle_b = PIPE_SWIZZLE_Z,
194                 .swizzle_a = PIPE_SWIZZLE_W,
195         };
196         struct pipe_sampler_view *src_view =
197                 ctx->create_sampler_view(ctx, &src->base, &src_tmpl);
198 
199         v3d_blitter_save(v3d, true, info->render_condition_enable);
200         util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box,
201                                   src_view, &info->src.box,
202                                   src->base.width0, src->base.height0,
203                                   PIPE_MASK_R,
204                                   PIPE_TEX_FILTER_NEAREST,
205                                   info->scissor_enable ? &info->scissor : NULL,
206                                   info->alpha_blend, false, 0);
207 
208         pipe_surface_reference(&dst_surf, NULL);
209         pipe_sampler_view_reference(&src_view, NULL);
210 
211         info->mask &= ~PIPE_MASK_S;
212 }
213 
214 bool
v3d_generate_mipmap(struct pipe_context * pctx,struct pipe_resource * prsc,enum pipe_format format,unsigned int base_level,unsigned int last_level,unsigned int first_layer,unsigned int last_layer)215 v3d_generate_mipmap(struct pipe_context *pctx,
216                     struct pipe_resource *prsc,
217                     enum pipe_format format,
218                     unsigned int base_level,
219                     unsigned int last_level,
220                     unsigned int first_layer,
221                     unsigned int last_layer)
222 {
223         if (format != prsc->format)
224                 return false;
225 
226         /* We could maybe support looping over layers for array textures, but
227          * we definitely don't support 3D.
228          */
229         if (first_layer != last_layer)
230                 return false;
231 
232         struct v3d_context *v3d = v3d_context(pctx);
233         struct v3d_screen *screen = v3d->screen;
234         struct v3d_device_info *devinfo = &screen->devinfo;
235 
236         return v3d_X(devinfo, tfu)(pctx,
237                                    prsc, prsc,
238                                    base_level,
239                                    base_level, last_level,
240                                    first_layer, first_layer,
241                                    true);
242 }
243 
244 static void
v3d_tfu_blit(struct pipe_context * pctx,struct pipe_blit_info * info)245 v3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
246 {
247         int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
248         int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
249 
250         if ((info->mask & PIPE_MASK_RGBA) == 0)
251                 return;
252 
253         if (info->scissor_enable ||
254             info->dst.box.x != 0 ||
255             info->dst.box.y != 0 ||
256             info->dst.box.width != dst_width ||
257             info->dst.box.height != dst_height ||
258             info->dst.box.depth != 1 ||
259             info->src.box.x != 0 ||
260             info->src.box.y != 0 ||
261             info->src.box.width != info->dst.box.width ||
262             info->src.box.height != info->dst.box.height ||
263             info->src.box.depth != 1) {
264                 return;
265         }
266 
267         if (info->dst.format != info->src.format)
268                 return;
269 
270         struct v3d_context *v3d = v3d_context(pctx);
271         struct v3d_screen *screen = v3d->screen;
272         struct v3d_device_info *devinfo = &screen->devinfo;
273 
274         if (v3d_X(devinfo, tfu)(pctx, info->dst.resource, info->src.resource,
275                                 info->src.level,
276                                 info->dst.level, info->dst.level,
277                                 info->src.box.z, info->dst.box.z,
278                                 false)) {
279                 info->mask &= ~PIPE_MASK_RGBA;
280         }
281 }
282 
283 static struct pipe_surface *
v3d_get_blit_surface(struct pipe_context * pctx,struct pipe_resource * prsc,enum pipe_format format,unsigned level,int16_t layer)284 v3d_get_blit_surface(struct pipe_context *pctx,
285                      struct pipe_resource *prsc,
286                      enum pipe_format format,
287                      unsigned level,
288                      int16_t layer)
289 {
290         struct pipe_surface tmpl;
291 
292         tmpl.format = format;
293         tmpl.u.tex.level = level;
294         tmpl.u.tex.first_layer = layer;
295         tmpl.u.tex.last_layer = layer;
296 
297         return pctx->create_surface(pctx, prsc, &tmpl);
298 }
299 
300 static bool
is_tile_unaligned(unsigned size,unsigned tile_size)301 is_tile_unaligned(unsigned size, unsigned tile_size)
302 {
303         return size & (tile_size - 1);
304 }
305 
306 static void
v3d_tlb_blit(struct pipe_context * pctx,struct pipe_blit_info * info)307 v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
308 {
309         struct v3d_context *v3d = v3d_context(pctx);
310         struct v3d_screen *screen = v3d->screen;
311         struct v3d_device_info *devinfo = &screen->devinfo;
312 
313         if (!info->mask)
314                 return;
315 
316         bool is_color_blit = info->mask & PIPE_MASK_RGBA;
317         bool is_depth_blit = info->mask & PIPE_MASK_Z;
318         bool is_stencil_blit = info->mask & PIPE_MASK_S;
319 
320         /* We should receive either a depth/stencil blit, or color blit, but
321          * not both.
322          */
323         assert ((is_color_blit && !is_depth_blit && !is_stencil_blit) ||
324                 (!is_color_blit && (is_depth_blit || is_stencil_blit)));
325 
326         if (info->scissor_enable)
327                 return;
328 
329         if (info->src.box.x != info->dst.box.x ||
330             info->src.box.y != info->dst.box.y ||
331             info->src.box.width != info->dst.box.width ||
332             info->src.box.height != info->dst.box.height)
333                 return;
334 
335         if (is_color_blit &&
336             util_format_is_depth_or_stencil(info->dst.format))
337                 return;
338 
339         if ((is_depth_blit || is_stencil_blit) &&
340             !util_format_is_depth_or_stencil(info->dst.format))
341                 return;
342 
343         if (!v3d_rt_format_supported(devinfo, info->src.format))
344                 return;
345 
346         if (v3d_get_rt_format(devinfo, info->src.format) !=
347             v3d_get_rt_format(devinfo, info->dst.format))
348                 return;
349 
350         bool msaa = (info->src.resource->nr_samples > 1 ||
351                      info->dst.resource->nr_samples > 1);
352         bool is_msaa_resolve = (info->src.resource->nr_samples > 1 &&
353                                 info->dst.resource->nr_samples < 2);
354 
355         if (is_msaa_resolve &&
356             !v3d_format_supports_tlb_msaa_resolve(devinfo, info->src.format))
357                 return;
358 
359         v3d_flush_jobs_writing_resource(v3d, info->src.resource, V3D_FLUSH_DEFAULT, false);
360 
361         struct pipe_surface *dst_surf =
362            v3d_get_blit_surface(pctx, info->dst.resource, info->dst.format, info->dst.level, info->dst.box.z);
363         struct pipe_surface *src_surf =
364            v3d_get_blit_surface(pctx, info->src.resource, info->src.format, info->src.level, info->src.box.z);
365 
366         struct pipe_surface *surfaces[V3D_MAX_DRAW_BUFFERS] = { 0 };
367         if (is_color_blit)
368                 surfaces[0] = dst_surf;
369 
370         bool double_buffer = V3D_DBG(DOUBLE_BUFFER) && !msaa;
371 
372         uint32_t tile_width, tile_height, max_bpp;
373         v3d_get_tile_buffer_size(devinfo, msaa, double_buffer,
374                                  is_color_blit ? 1 : 0, surfaces, src_surf,
375                                  &tile_width, &tile_height, &max_bpp);
376 
377         int dst_surface_width = u_minify(info->dst.resource->width0,
378                                          info->dst.level);
379         int dst_surface_height = u_minify(info->dst.resource->height0,
380                                          info->dst.level);
381         if (is_tile_unaligned(info->dst.box.x, tile_width) ||
382             is_tile_unaligned(info->dst.box.y, tile_height) ||
383             (is_tile_unaligned(info->dst.box.width, tile_width) &&
384              info->dst.box.x + info->dst.box.width != dst_surface_width) ||
385             (is_tile_unaligned(info->dst.box.height, tile_height) &&
386              info->dst.box.y + info->dst.box.height != dst_surface_height)) {
387                 pipe_surface_reference(&dst_surf, NULL);
388                 pipe_surface_reference(&src_surf, NULL);
389                 return;
390         }
391 
392         struct v3d_job *job = v3d_get_job(v3d,
393                                           is_color_blit ? 1u : 0u,
394                                           surfaces,
395                                           is_color_blit ? NULL : dst_surf,
396                                           src_surf);
397         job->msaa = msaa;
398         job->double_buffer = double_buffer;
399         job->tile_width = tile_width;
400         job->tile_height = tile_height;
401         job->internal_bpp = max_bpp;
402         job->draw_min_x = info->dst.box.x;
403         job->draw_min_y = info->dst.box.y;
404         job->draw_max_x = info->dst.box.x + info->dst.box.width;
405         job->draw_max_y = info->dst.box.y + info->dst.box.height;
406         job->scissor.disabled = false;
407 
408         /* The simulator complains if we do a TLB load from a source with a
409          * stride that is smaller than the destination's, so we program the
410          * 'frame region' to match the smallest dimensions of the two surfaces.
411          * This should be fine because we only get here if the src and dst boxes
412          * match, so we know the blit involves the same tiles on both surfaces.
413          */
414         job->draw_width = MIN2(dst_surf->width, src_surf->width);
415         job->draw_height = MIN2(dst_surf->height, src_surf->height);
416         job->draw_tiles_x = DIV_ROUND_UP(job->draw_width,
417                                          job->tile_width);
418         job->draw_tiles_y = DIV_ROUND_UP(job->draw_height,
419                                          job->tile_height);
420 
421         job->needs_flush = true;
422         job->num_layers = info->dst.box.depth;
423 
424         job->store = 0;
425         if (is_color_blit) {
426                 job->store |= PIPE_CLEAR_COLOR0;
427                 info->mask &= ~PIPE_MASK_RGBA;
428         }
429         if (is_depth_blit) {
430                 job->store |= PIPE_CLEAR_DEPTH;
431                 info->mask &= ~PIPE_MASK_Z;
432         }
433         if (is_stencil_blit){
434                 job->store |= PIPE_CLEAR_STENCIL;
435                 info->mask &= ~PIPE_MASK_S;
436         }
437 
438         v3d_X(devinfo, start_binning)(v3d, job);
439 
440         v3d_job_submit(v3d, job);
441 
442         pipe_surface_reference(&dst_surf, NULL);
443         pipe_surface_reference(&src_surf, NULL);
444 }
445 
446 /**
447  * Creates the VS of the custom blit shader to convert YUV plane from
448  * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
449  * This vertex shader is mostly a pass-through VS.
450  */
451 static void *
v3d_get_sand8_vs(struct pipe_context * pctx)452 v3d_get_sand8_vs(struct pipe_context *pctx)
453 {
454         struct v3d_context *v3d = v3d_context(pctx);
455         struct pipe_screen *pscreen = pctx->screen;
456 
457         if (v3d->sand8_blit_vs)
458                 return v3d->sand8_blit_vs;
459 
460         const struct nir_shader_compiler_options *options =
461                 pscreen->get_compiler_options(pscreen,
462                                               PIPE_SHADER_IR_NIR,
463                                               PIPE_SHADER_VERTEX);
464 
465         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
466                                                        options,
467                                                        "sand8_blit_vs");
468 
469         const struct glsl_type *vec4 = glsl_vec4_type();
470         nir_variable *pos_in = nir_variable_create(b.shader,
471                                                    nir_var_shader_in,
472                                                    vec4, "pos");
473 
474         nir_variable *pos_out = nir_variable_create(b.shader,
475                                                     nir_var_shader_out,
476                                                     vec4, "gl_Position");
477         pos_out->data.location = VARYING_SLOT_POS;
478         nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
479 
480         v3d->sand8_blit_vs = pipe_shader_from_nir(pctx, b.shader);
481 
482         return v3d->sand8_blit_vs;
483 }
484 /**
485  * Creates the FS of the custom blit shader to convert YUV plane from
486  * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
487  * The result texture is equivalent to a chroma (cpp=2) or luma (cpp=1)
488  * plane for a NV12 format without the SAND modifier.
489  */
490 static void *
v3d_get_sand8_fs(struct pipe_context * pctx,int cpp)491 v3d_get_sand8_fs(struct pipe_context *pctx, int cpp)
492 {
493         struct v3d_context *v3d = v3d_context(pctx);
494         struct pipe_screen *pscreen = pctx->screen;
495         struct pipe_shader_state **cached_shader;
496         const char *name;
497 
498         if (cpp == 1) {
499                 cached_shader = &v3d->sand8_blit_fs_luma;
500                 name = "sand8_blit_fs_luma";
501         } else {
502                 cached_shader = &v3d->sand8_blit_fs_chroma;
503                 name = "sand8_blit_fs_chroma";
504         }
505 
506         if (*cached_shader)
507                 return *cached_shader;
508 
509         const struct nir_shader_compiler_options *options =
510                 pscreen->get_compiler_options(pscreen,
511                                               PIPE_SHADER_IR_NIR,
512                                               PIPE_SHADER_FRAGMENT);
513 
514         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
515                                                        options, "%s", name);
516         b.shader->info.num_ubos = 1;
517         b.shader->num_outputs = 1;
518         b.shader->num_inputs = 1;
519         b.shader->num_uniforms = 1;
520 
521         const struct glsl_type *vec4 = glsl_vec4_type();
522 
523         const struct glsl_type *glsl_uint = glsl_uint_type();
524 
525         nir_variable *color_out =
526                 nir_variable_create(b.shader, nir_var_shader_out,
527                                     vec4, "f_color");
528         color_out->data.location = FRAG_RESULT_COLOR;
529 
530         nir_variable *pos_in =
531                 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
532         pos_in->data.location = VARYING_SLOT_POS;
533         nir_def *pos = nir_load_var(&b, pos_in);
534 
535         nir_def *zero = nir_imm_int(&b, 0);
536         nir_def *one = nir_imm_int(&b, 1);
537         nir_def *two = nir_imm_int(&b, 2);
538         nir_def *six = nir_imm_int(&b, 6);
539         nir_def *seven = nir_imm_int(&b, 7);
540         nir_def *eight = nir_imm_int(&b, 8);
541 
542         nir_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
543         nir_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
544 
545         nir_variable *stride_in =
546                 nir_variable_create(b.shader, nir_var_uniform, glsl_uint,
547                                     "sand8_stride");
548         nir_def *stride =
549                 nir_load_uniform(&b, 1, 32, zero,
550                                  .base = stride_in->data.driver_location,
551                                  .range = 4,
552                                  .dest_type = nir_type_uint32);
553 
554         nir_def *x_offset;
555         nir_def *y_offset;
556 
557         /* UIF tiled format is composed by UIF blocks, Each block has
558          * four 64 byte microtiles. Inside each microtile pixels are stored
559          * in raster format. But microtiles have different dimensions
560          * based in the bits per pixel of the image.
561          *
562          *   8bpp microtile dimensions are 8x8
563          *  16bpp microtile dimensions are 8x4
564          *  32bpp microtile dimensions are 4x4
565          *
566          * As we are reading and writing with 32bpp to optimize
567          * the number of texture operations during the blit, we need
568          * to adjust the offsets were we read and write as data will
569          * be later read using 8bpp (luma) and 16bpp (chroma).
570          *
571          * For chroma 8x4 16bpp raster order is compatible with 4x4
572          * 32bpp. In both layouts each line has 8*2 == 4*4 == 16 bytes.
573          * But luma 8x8 8bpp raster order is not compatible
574          * with 4x4 32bpp. 8bpp has 8 bytes per line, and 32bpp has
575          * 16 bytes per line. So if we read a 8bpp texture that was
576          * written as 32bpp texture. Bytes would be misplaced.
577          *
578          * inter/intra_utile_x_offsets takes care of mapping the offsets
579          * between microtiles to deal with this issue for luma planes.
580          */
581         if (cpp == 1) {
582                 nir_def *intra_utile_x_offset =
583                         nir_ishl(&b, nir_iand_imm(&b, x, 1), two);
584                 nir_def *inter_utile_x_offset =
585                         nir_ishl(&b, nir_iand_imm(&b, x, 60), one);
586                 nir_def *stripe_offset=
587                         nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 6),
588                                              stride),
589                                  seven);
590 
591                 x_offset = nir_iadd(&b, stripe_offset,
592                                         nir_iadd(&b, intra_utile_x_offset,
593                                                      inter_utile_x_offset));
594                 y_offset = nir_iadd(&b,
595                                     nir_ishl(&b, nir_iand_imm(&b, x, 2), six),
596                                     nir_ishl(&b, y, eight));
597         } else  {
598                 nir_def *stripe_offset=
599                         nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 5),
600                                                 stride),
601                                  seven);
602                 x_offset = nir_iadd(&b, stripe_offset,
603                                nir_ishl(&b, nir_iand_imm(&b, x, 31), two));
604                 y_offset = nir_ishl(&b, y, seven);
605         }
606         nir_def *ubo_offset = nir_iadd(&b, x_offset, y_offset);
607         nir_def *load =
608         nir_load_ubo(&b, 1, 32, zero, ubo_offset,
609                     .align_mul = 4,
610                     .align_offset = 0,
611                     .range_base = 0,
612                     .range = ~0);
613 
614         nir_def *output = nir_unpack_unorm_4x8(&b, load);
615 
616         nir_store_var(&b, color_out,
617                       output,
618                       0xF);
619 
620 
621         *cached_shader = pipe_shader_from_nir(pctx, b.shader);
622 
623         return *cached_shader;
624 }
625 
626 /**
627  * Turns NV12 with SAND8 format modifier from raster-order with interleaved
628  * luma and chroma 128-byte-wide-columns to tiled format for luma and chroma.
629  *
630  * This implementation is based on vc4_yuv_blit.
631  */
632 static void
v3d_sand8_blit(struct pipe_context * pctx,struct pipe_blit_info * info)633 v3d_sand8_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
634 {
635         struct v3d_context *v3d = v3d_context(pctx);
636         struct v3d_resource *src = v3d_resource(info->src.resource);
637         ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource);
638 
639         if (!src->sand_col128_stride)
640                 return;
641         if (src->tiled)
642                 return;
643         if (src->base.format != PIPE_FORMAT_R8_UNORM &&
644             src->base.format != PIPE_FORMAT_R8G8_UNORM)
645                 return;
646         if (!(info->mask & PIPE_MASK_RGBA))
647                 return;
648 
649         assert(dst->base.format == src->base.format);
650         assert(dst->tiled);
651 
652         assert(info->src.box.x == 0 && info->dst.box.x == 0);
653         assert(info->src.box.y == 0 && info->dst.box.y == 0);
654         assert(info->src.box.width == info->dst.box.width);
655         assert(info->src.box.height == info->dst.box.height);
656 
657         v3d_blitter_save(v3d, true, info->render_condition_enable);
658 
659         struct pipe_surface dst_tmpl;
660         util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
661                                          info->dst.level, info->dst.box.z);
662         /* Although the src textures are cpp=1 or cpp=2, the dst texture
663          * uses a cpp=4 dst texture. So, all read/write texture ops will
664          * be done using 32-bit read and writes.
665          */
666         dst_tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM;
667         struct pipe_surface *dst_surf =
668                 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
669         if (!dst_surf) {
670                 fprintf(stderr, "Failed to create YUV dst surface\n");
671                 util_blitter_unset_running_flag(v3d->blitter);
672                 return;
673         }
674 
675         uint32_t sand8_stride = src->sand_col128_stride;
676 
677         /* Adjust the dimensions of dst luma/chroma to match src
678          * size now we are using a cpp=4 format. Next dimension take into
679          * account the UIF microtile layouts.
680          */
681         dst_surf->width = align(dst_surf->width, 8) / 2;
682         if (src->cpp == 1)
683                 dst_surf->height /= 2;
684 
685         /* Set the constant buffer. */
686         struct pipe_constant_buffer cb_uniforms = {
687                 .user_buffer = &sand8_stride,
688                 .buffer_size = sizeof(sand8_stride),
689         };
690 
691         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false,
692                                   &cb_uniforms);
693         struct pipe_constant_buffer saved_fs_cb1 = { 0 };
694         pipe_resource_reference(&saved_fs_cb1.buffer,
695                                 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1].buffer);
696         memcpy(&saved_fs_cb1, &v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1],
697                sizeof(struct pipe_constant_buffer));
698         struct pipe_constant_buffer cb_src = {
699                 .buffer = info->src.resource,
700                 .buffer_offset = src->slices[info->src.level].offset,
701                 .buffer_size = (src->bo->size -
702                                 src->slices[info->src.level].offset),
703         };
704         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false,
705                                   &cb_src);
706         /* Unbind the textures, to make sure we don't try to recurse into the
707          * shadow blit.
708          */
709         pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL);
710         pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
711 
712         util_blitter_custom_shader(v3d->blitter, dst_surf,
713                                    v3d_get_sand8_vs(pctx),
714                                    v3d_get_sand8_fs(pctx, src->cpp));
715 
716         util_blitter_restore_textures(v3d->blitter);
717         util_blitter_restore_constant_buffer_state(v3d->blitter);
718 
719         /* Restore cb1 (util_blitter doesn't handle this one). */
720         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, true,
721                                   &saved_fs_cb1);
722 
723         pipe_surface_reference(&dst_surf, NULL);
724 
725         info->mask &= ~PIPE_MASK_RGBA;
726 }
727 
728 
729 /**
730  * Creates the VS of the custom blit shader to convert YUV plane from
731  * the P030 format with BROADCOM_SAND_COL128 modifier to UIF tiled P010
732  * format.
733  * This vertex shader is mostly a pass-through VS.
734  */
735 static void *
v3d_get_sand30_vs(struct pipe_context * pctx)736 v3d_get_sand30_vs(struct pipe_context *pctx)
737 {
738         struct v3d_context *v3d = v3d_context(pctx);
739         struct pipe_screen *pscreen = pctx->screen;
740 
741         if (v3d->sand30_blit_vs)
742                 return v3d->sand30_blit_vs;
743 
744         const struct nir_shader_compiler_options *options =
745                 pscreen->get_compiler_options(pscreen,
746                                               PIPE_SHADER_IR_NIR,
747                                               PIPE_SHADER_VERTEX);
748 
749         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
750                                                        options,
751                                                        "sand30_blit_vs");
752 
753         const struct glsl_type *vec4 = glsl_vec4_type();
754         nir_variable *pos_in = nir_variable_create(b.shader,
755                                                    nir_var_shader_in,
756                                                    vec4, "pos");
757 
758         nir_variable *pos_out = nir_variable_create(b.shader,
759                                                     nir_var_shader_out,
760                                                     vec4, "gl_Position");
761         pos_out->data.location = VARYING_SLOT_POS;
762         nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
763 
764         v3d->sand30_blit_vs = pipe_shader_from_nir(pctx, b.shader);
765 
766         return v3d->sand30_blit_vs;
767 }
768 
769 /**
770  * Given an uvec2 value with rgb10a2 components, it extracts four 10-bit
771  * components, then converts them from unorm10 to unorm16 and returns them
772  * in an uvec4. The start parameter defines where the sequence of 4 values
773  * begins.
774  */
775 static nir_def *
extract_unorm_2xrgb10a2_component_to_4xunorm16(nir_builder * b,nir_def * value,nir_def * start)776 extract_unorm_2xrgb10a2_component_to_4xunorm16(nir_builder *b,
777                                                nir_def *value,
778                                                nir_def *start)
779 {
780         const unsigned mask = BITFIELD_MASK(10);
781 
782         nir_def *shiftw0 = nir_imul_imm(b, start, 10);
783         nir_def *word0 = nir_iand_imm(b, nir_channel(b, value, 0),
784                                           BITFIELD_MASK(30));
785         nir_def *finalword0 = nir_ushr(b, word0, shiftw0);
786         nir_def *word1 = nir_channel(b, value, 1);
787         nir_def *shiftw0tow1 = nir_isub_imm(b, 30, shiftw0);
788         nir_def *word1toword0 =  nir_ishl(b, word1, shiftw0tow1);
789         finalword0 = nir_ior(b, finalword0, word1toword0);
790         nir_def *finalword1 = nir_ushr(b, word1, shiftw0);
791 
792         nir_def *val0 = nir_ishl_imm(b, nir_iand_imm(b, finalword0,
793                                                          mask), 6);
794         nir_def *val1 = nir_ishr_imm(b, nir_iand_imm(b, finalword0,
795                                                          mask << 10), 4);
796         nir_def *val2 = nir_ishr_imm(b, nir_iand_imm(b, finalword0,
797                                                          mask << 20), 14);
798         nir_def *val3 = nir_ishl_imm(b, nir_iand_imm(b, finalword1,
799                                                          mask), 6);
800 
801         return nir_vec4(b, val0, val1, val2, val3);
802 }
803 
804 /**
805  * Creates the FS of the custom blit shader to convert YUV plane from
806  * the P030 format with BROADCOM_SAND_COL128 modifier to UIF tiled P10
807  * format a 16-bit representation per component.
808  *
809  * The result texture is equivalent to a chroma (cpp=4) or luma (cpp=2)
810  * plane for a P010 format without the SAND128 modifier.
811  */
812 static void *
v3d_get_sand30_fs(struct pipe_context * pctx)813 v3d_get_sand30_fs(struct pipe_context *pctx)
814 {
815         struct v3d_context *v3d = v3d_context(pctx);
816         struct pipe_screen *pscreen = pctx->screen;
817 
818         if (v3d->sand30_blit_fs)
819                 return  v3d->sand30_blit_fs;
820 
821         const struct nir_shader_compiler_options *options =
822                 pscreen->get_compiler_options(pscreen,
823                                               PIPE_SHADER_IR_NIR,
824                                               PIPE_SHADER_FRAGMENT);
825 
826         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
827                                                        options,
828                                                        "sand30_blit_fs");
829         b.shader->info.num_ubos = 1;
830         b.shader->num_outputs = 1;
831         b.shader->num_inputs = 1;
832         b.shader->num_uniforms = 1;
833 
834         const struct glsl_type *vec4 = glsl_vec4_type();
835 
836         const struct glsl_type *glsl_uint = glsl_uint_type();
837         const struct glsl_type *glsl_uvec4 = glsl_vector_type(GLSL_TYPE_UINT,
838                                                               4);
839 
840         nir_variable *color_out = nir_variable_create(b.shader,
841                                                       nir_var_shader_out,
842                                                       glsl_uvec4, "f_color");
843         color_out->data.location = FRAG_RESULT_COLOR;
844 
845         nir_variable *pos_in =
846                 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
847         pos_in->data.location = VARYING_SLOT_POS;
848         nir_def *pos = nir_load_var(&b, pos_in);
849 
850         nir_def *zero = nir_imm_int(&b, 0);
851         nir_def *three = nir_imm_int(&b, 3);
852 
853         /* With a SAND128 stripe, in 128-bytes with rgb10a2 format we have 96
854          * 10-bit values. So, it represents 96 pixels for Y plane and 48 pixels
855          * for UV frame, but as we are reading 4 10-bit-values at a time we
856          * will have 24 groups (pixels) of 4 10-bit values.
857          */
858         uint32_t pixels_stripe = 24;
859 
860         nir_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
861         nir_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
862 
863         /* UIF tiled format is composed by UIF blocks. Each block has four 64
864          * byte microtiles. Inside each microtile pixels are stored in raster
865          * format. But microtiles have different dimensions based in the bits
866          * per pixel of the image.
867          *
868          *  16bpp microtile dimensions are 8x4
869          *  32bpp microtile dimensions are 4x4
870          *  64bpp microtile dimensions are 4x2
871          *
872          * As we are reading and writing with 64bpp to optimize the number of
873          * texture operations during the blit, we adjust the offsets so when
874          * the microtile is sampled using the 16bpp (luma) and the 32bpp
875          * (chroma) the expected pixels are in the correct position, that
876          * would be different if we were using a 64bpp sampling.
877          *
878          * For luma 8x4 16bpp and chroma 4x4 32bpp luma raster order is
879          * incompatible with 4x2 64bpp. 16bpp has 16 bytes per line, 32bpp has
880          * also 16byte per line. But 64bpp has 32 bytes per line. So if we
881          * read a 16bpp or 32bpp texture that was written as 64bpp texture,
882          * pixels would be misplaced.
883          *
884          * inter/intra_utile_x_offsets takes care of mapping the offsets
885          * between microtiles to deal with this issue for luma and chroma
886          * planes.
887          *
888          * We reduce the luma and chroma planes to the same blit case
889          * because 16bpp and 32bpp have compatible microtile raster layout.
890          * So just doubling the width of the chroma plane before calling the
891          * blit makes them equivalent.
892          */
893         nir_variable *stride_in =
894                 nir_variable_create(b.shader, nir_var_uniform,
895                                     glsl_uint, "sand30_stride");
896         nir_def *stride =
897                 nir_load_uniform(&b, 1, 32, zero,
898                                  .base = stride_in->data.driver_location,
899                                  .range = 4,
900                                  .dest_type = nir_type_uint32);
901 
902         nir_def *real_x = nir_ior(&b, nir_iand_imm(&b, x, 1),
903                                       nir_ishl_imm(&b,nir_ushr_imm(&b, x, 2),
904                                       1));
905         nir_def *x_pos_in_stripe = nir_umod_imm(&b, real_x, pixels_stripe);
906         nir_def *component = nir_umod(&b, real_x, three);
907         nir_def *intra_utile_x_offset = nir_ishl_imm(&b, component, 2);
908 
909         nir_def *inter_utile_x_offset =
910                 nir_ishl_imm(&b, nir_udiv_imm(&b, x_pos_in_stripe, 3), 4);
911 
912         nir_def *stripe_offset=
913                 nir_ishl_imm(&b,
914                              nir_imul(&b,
915                                       nir_udiv_imm(&b, real_x, pixels_stripe),
916                                       stride),
917                              7);
918 
919         nir_def *x_offset = nir_iadd(&b, stripe_offset,
920                                          nir_iadd(&b, intra_utile_x_offset,
921                                                   inter_utile_x_offset));
922         nir_def *y_offset =
923                 nir_iadd(&b, nir_ishl_imm(&b, nir_iand_imm(&b, x, 2), 6),
924                          nir_ishl_imm(&b, y, 8));
925         nir_def *ubo_offset = nir_iadd(&b, x_offset, y_offset);
926 
927         nir_def *load = nir_load_ubo(&b, 2, 32, zero, ubo_offset,
928                                          .align_mul = 8,
929                                          .align_offset = 0,
930                                          .range_base = 0,
931                                          .range = ~0);
932         nir_def *output =
933                 extract_unorm_2xrgb10a2_component_to_4xunorm16(&b, load,
934                                                                component);
935         nir_store_var(&b, color_out,
936                       output,
937                       0xf);
938 
939         v3d->sand30_blit_fs = pipe_shader_from_nir(pctx, b.shader);
940 
941         return v3d->sand30_blit_fs;
942 }
943 
944 /**
945  * Turns P030 with SAND30 format modifier from raster-order with interleaved
946  * luma and chroma 128-byte-wide-columns to a P010 UIF tiled format for luma
947  * and chroma.
948  */
949 static void
v3d_sand30_blit(struct pipe_context * pctx,struct pipe_blit_info * info)950 v3d_sand30_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
951 {
952         struct v3d_context *v3d = v3d_context(pctx);
953         struct v3d_resource *src = v3d_resource(info->src.resource);
954         ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource);
955 
956         if (!src->sand_col128_stride)
957                 return;
958         if (src->tiled)
959                 return;
960         if (src->base.format != PIPE_FORMAT_R16_UNORM &&
961             src->base.format != PIPE_FORMAT_R16G16_UNORM)
962                 return;
963         if (!(info->mask & PIPE_MASK_RGBA))
964                 return;
965 
966         assert(dst->base.format == src->base.format);
967         assert(dst->tiled);
968 
969         assert(info->src.box.x == 0 && info->dst.box.x == 0);
970         assert(info->src.box.y == 0 && info->dst.box.y == 0);
971         assert(info->src.box.width == info->dst.box.width);
972         assert(info->src.box.height == info->dst.box.height);
973 
974         v3d_blitter_save(v3d, true, info->render_condition_enable);
975 
976         struct pipe_surface dst_tmpl;
977         util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
978                                          info->dst.level, info->dst.box.z);
979 
980         dst_tmpl.format = PIPE_FORMAT_R16G16B16A16_UINT;
981 
982         struct pipe_surface *dst_surf =
983                 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
984         if (!dst_surf) {
985                 fprintf(stderr, "Failed to create YUV dst surface\n");
986                 util_blitter_unset_running_flag(v3d->blitter);
987                 return;
988         }
989 
990         uint32_t sand30_stride = src->sand_col128_stride;
991 
992         /* Adjust the dimensions of dst luma/chroma to match src
993          * size now we are using a cpp=8 format. Next dimension take into
994          * account the UIF microtile layouts.
995          */
996         dst_surf->height /= 2;
997         dst_surf->width = align(dst_surf->width, 8);
998         if (src->cpp == 2)
999                 dst_surf->width /= 2;
1000         /* Set the constant buffer. */
1001         struct pipe_constant_buffer cb_uniforms = {
1002                 .user_buffer = &sand30_stride,
1003                 .buffer_size = sizeof(sand30_stride),
1004         };
1005 
1006         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false,
1007                                   &cb_uniforms);
1008 
1009         struct pipe_constant_buffer saved_fs_cb1 = { 0 };
1010         pipe_resource_reference(&saved_fs_cb1.buffer,
1011                                 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1].buffer);
1012         memcpy(&saved_fs_cb1, &v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1],
1013                sizeof(struct pipe_constant_buffer));
1014         struct pipe_constant_buffer cb_src = {
1015                 .buffer = info->src.resource,
1016                 .buffer_offset = src->slices[info->src.level].offset,
1017                 .buffer_size = (src->bo->size -
1018                                 src->slices[info->src.level].offset),
1019         };
1020         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false,
1021                                   &cb_src);
1022         /* Unbind the textures, to make sure we don't try to recurse into the
1023          * shadow blit.
1024          */
1025         pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false,
1026                                 NULL);
1027         pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
1028 
1029         util_blitter_custom_shader(v3d->blitter, dst_surf,
1030                                    v3d_get_sand30_vs(pctx),
1031                                    v3d_get_sand30_fs(pctx));
1032 
1033         util_blitter_restore_textures(v3d->blitter);
1034         util_blitter_restore_constant_buffer_state(v3d->blitter);
1035 
1036         /* Restore cb1 (util_blitter doesn't handle this one). */
1037         pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, true,
1038                                   &saved_fs_cb1);
1039         pipe_surface_reference(&dst_surf, NULL);
1040 
1041         info->mask &= ~PIPE_MASK_RGBA;
1042         return;
1043 }
1044 
1045 /* Optimal hardware path for blitting pixels.
1046  * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1047  */
1048 void
v3d_blit(struct pipe_context * pctx,const struct pipe_blit_info * blit_info)1049 v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1050 {
1051         struct v3d_context *v3d = v3d_context(pctx);
1052         struct pipe_blit_info info = *blit_info;
1053 
1054         if (info.render_condition_enable && !v3d_render_condition_check(v3d))
1055                 return;
1056 
1057         v3d_sand30_blit(pctx, &info);
1058 
1059         v3d_sand8_blit(pctx, &info);
1060 
1061         v3d_tfu_blit(pctx, &info);
1062 
1063         v3d_tlb_blit(pctx, &info);
1064 
1065         v3d_stencil_blit(pctx, &info);
1066 
1067         v3d_render_blit(pctx, &info);
1068 
1069         /* Flush our blit jobs immediately.  They're unlikely to get reused by
1070          * normal drawing or other blits, and without flushing we can easily
1071          * run into unexpected OOMs when blits are used for a large series of
1072          * texture uploads before using the textures.
1073          */
1074         v3d_flush_jobs_writing_resource(v3d, info.dst.resource,
1075                                         V3D_FLUSH_DEFAULT, false);
1076 }
1077