1 /*
2 * Copyright © 2015-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir/pipe_nir.h"
25 #include "util/format/u_format.h"
26 #include "util/u_surface.h"
27 #include "util/u_blitter.h"
28 #include "compiler/nir/nir_builder.h"
29 #include "v3d_context.h"
30 #include "broadcom/common/v3d_tiling.h"
31 #include "broadcom/common/v3d_tfu.h"
32
33 /**
34 * The param @op_blit is used to tell if we are saving state for blitter_blit
35 * (if true) or blitter_clear (if false). If other blitter functions are used
36 * that require different state we may need something more elaborated than
37 * this.
38 */
39
40 void
v3d_blitter_save(struct v3d_context * v3d,bool op_blit,bool render_cond)41 v3d_blitter_save(struct v3d_context *v3d, bool op_blit, bool render_cond)
42 {
43 util_blitter_save_fragment_constant_buffer_slot(v3d->blitter,
44 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb);
45 util_blitter_save_vertex_buffers(v3d->blitter, v3d->vertexbuf.vb, v3d->vertexbuf.count);
46 util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx);
47 util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs);
48 util_blitter_save_geometry_shader(v3d->blitter, v3d->prog.bind_gs);
49 util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
50 v3d->streamout.targets);
51 util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer);
52 util_blitter_save_viewport(v3d->blitter, &v3d->viewport);
53 util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs);
54 util_blitter_save_blend(v3d->blitter, v3d->blend);
55 util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa);
56 util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref);
57 util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask, 0);
58 util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
59 v3d->streamout.targets);
60 util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer);
61
62 if (op_blit) {
63 util_blitter_save_scissor(v3d->blitter, &v3d->scissor);
64 util_blitter_save_fragment_sampler_states(v3d->blitter,
65 v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers,
66 (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers);
67 util_blitter_save_fragment_sampler_views(v3d->blitter,
68 v3d->tex[PIPE_SHADER_FRAGMENT].num_textures,
69 v3d->tex[PIPE_SHADER_FRAGMENT].textures);
70 }
71
72 if (!render_cond) {
73 util_blitter_save_render_condition(v3d->blitter, v3d->cond_query,
74 v3d->cond_cond, v3d->cond_mode);
75 }
76 }
77
78 static void
v3d_render_blit(struct pipe_context * ctx,struct pipe_blit_info * info)79 v3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
80 {
81 struct v3d_context *v3d = v3d_context(ctx);
82 struct v3d_resource *src = v3d_resource(info->src.resource);
83 struct pipe_resource *tiled = NULL;
84
85 if (!info->mask)
86 return;
87
88 if (!src->tiled &&
89 info->src.resource->target != PIPE_TEXTURE_1D &&
90 info->src.resource->target != PIPE_TEXTURE_1D_ARRAY) {
91 struct pipe_box box = {
92 .x = 0,
93 .y = 0,
94 .width = u_minify(info->src.resource->width0,
95 info->src.level),
96 .height = u_minify(info->src.resource->height0,
97 info->src.level),
98 .depth = 1,
99 };
100 struct pipe_resource tmpl = {
101 .target = info->src.resource->target,
102 .format = info->src.resource->format,
103 .width0 = box.width,
104 .height0 = box.height,
105 .depth0 = 1,
106 .array_size = 1,
107 };
108 tiled = ctx->screen->resource_create(ctx->screen, &tmpl);
109 if (!tiled) {
110 fprintf(stderr, "Failed to create tiled blit temp\n");
111 return;
112 }
113 ctx->resource_copy_region(ctx,
114 tiled, 0,
115 0, 0, 0,
116 info->src.resource, info->src.level,
117 &box);
118 info->src.level = 0;
119 info->src.resource = tiled;
120 }
121
122 if (!util_blitter_is_blit_supported(v3d->blitter, info)) {
123 fprintf(stderr, "blit unsupported %s -> %s\n",
124 util_format_short_name(info->src.format),
125 util_format_short_name(info->dst.format));
126 return;
127 }
128
129 v3d_blitter_save(v3d, true, info->render_condition_enable);
130 util_blitter_blit(v3d->blitter, info);
131
132 pipe_resource_reference(&tiled, NULL);
133 info->mask = 0;
134 }
135
136 /* Implement stencil blits by reinterpreting the stencil data as an RGBA8888
137 * or R8 texture.
138 */
139 static void
v3d_stencil_blit(struct pipe_context * ctx,struct pipe_blit_info * info)140 v3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
141 {
142 struct v3d_context *v3d = v3d_context(ctx);
143 struct v3d_resource *src = v3d_resource(info->src.resource);
144 struct v3d_resource *dst = v3d_resource(info->dst.resource);
145 enum pipe_format src_format, dst_format;
146
147 if ((info->mask & PIPE_MASK_S) == 0)
148 return;
149
150 if (src->separate_stencil) {
151 src = src->separate_stencil;
152 src_format = PIPE_FORMAT_R8_UINT;
153 } else {
154 src_format = PIPE_FORMAT_RGBA8888_UINT;
155 }
156
157 if (dst->separate_stencil) {
158 dst = dst->separate_stencil;
159 dst_format = PIPE_FORMAT_R8_UINT;
160 } else {
161 dst_format = PIPE_FORMAT_RGBA8888_UINT;
162 }
163
164 /* Initialize the surface. */
165 struct pipe_surface dst_tmpl = {
166 .u.tex = {
167 .level = info->dst.level,
168 .first_layer = info->dst.box.z,
169 .last_layer = info->dst.box.z,
170 },
171 .format = dst_format,
172 };
173 struct pipe_surface *dst_surf =
174 ctx->create_surface(ctx, &dst->base, &dst_tmpl);
175
176 /* Initialize the sampler view. */
177 struct pipe_sampler_view src_tmpl = {
178 .target = (src->base.target == PIPE_TEXTURE_CUBE_ARRAY) ?
179 PIPE_TEXTURE_2D_ARRAY :
180 src->base.target,
181 .format = src_format,
182 .u.tex = {
183 .first_level = info->src.level,
184 .last_level = info->src.level,
185 .first_layer = 0,
186 .last_layer = (PIPE_TEXTURE_3D ?
187 u_minify(src->base.depth0,
188 info->src.level) - 1 :
189 src->base.array_size - 1),
190 },
191 .swizzle_r = PIPE_SWIZZLE_X,
192 .swizzle_g = PIPE_SWIZZLE_Y,
193 .swizzle_b = PIPE_SWIZZLE_Z,
194 .swizzle_a = PIPE_SWIZZLE_W,
195 };
196 struct pipe_sampler_view *src_view =
197 ctx->create_sampler_view(ctx, &src->base, &src_tmpl);
198
199 v3d_blitter_save(v3d, true, info->render_condition_enable);
200 util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box,
201 src_view, &info->src.box,
202 src->base.width0, src->base.height0,
203 PIPE_MASK_R,
204 PIPE_TEX_FILTER_NEAREST,
205 info->scissor_enable ? &info->scissor : NULL,
206 info->alpha_blend, false, 0);
207
208 pipe_surface_reference(&dst_surf, NULL);
209 pipe_sampler_view_reference(&src_view, NULL);
210
211 info->mask &= ~PIPE_MASK_S;
212 }
213
214 bool
v3d_generate_mipmap(struct pipe_context * pctx,struct pipe_resource * prsc,enum pipe_format format,unsigned int base_level,unsigned int last_level,unsigned int first_layer,unsigned int last_layer)215 v3d_generate_mipmap(struct pipe_context *pctx,
216 struct pipe_resource *prsc,
217 enum pipe_format format,
218 unsigned int base_level,
219 unsigned int last_level,
220 unsigned int first_layer,
221 unsigned int last_layer)
222 {
223 if (format != prsc->format)
224 return false;
225
226 /* We could maybe support looping over layers for array textures, but
227 * we definitely don't support 3D.
228 */
229 if (first_layer != last_layer)
230 return false;
231
232 struct v3d_context *v3d = v3d_context(pctx);
233 struct v3d_screen *screen = v3d->screen;
234 struct v3d_device_info *devinfo = &screen->devinfo;
235
236 return v3d_X(devinfo, tfu)(pctx,
237 prsc, prsc,
238 base_level,
239 base_level, last_level,
240 first_layer, first_layer,
241 true);
242 }
243
244 static void
v3d_tfu_blit(struct pipe_context * pctx,struct pipe_blit_info * info)245 v3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
246 {
247 int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
248 int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
249
250 if ((info->mask & PIPE_MASK_RGBA) == 0)
251 return;
252
253 if (info->scissor_enable ||
254 info->dst.box.x != 0 ||
255 info->dst.box.y != 0 ||
256 info->dst.box.width != dst_width ||
257 info->dst.box.height != dst_height ||
258 info->dst.box.depth != 1 ||
259 info->src.box.x != 0 ||
260 info->src.box.y != 0 ||
261 info->src.box.width != info->dst.box.width ||
262 info->src.box.height != info->dst.box.height ||
263 info->src.box.depth != 1) {
264 return;
265 }
266
267 if (info->dst.format != info->src.format)
268 return;
269
270 struct v3d_context *v3d = v3d_context(pctx);
271 struct v3d_screen *screen = v3d->screen;
272 struct v3d_device_info *devinfo = &screen->devinfo;
273
274 if (v3d_X(devinfo, tfu)(pctx, info->dst.resource, info->src.resource,
275 info->src.level,
276 info->dst.level, info->dst.level,
277 info->src.box.z, info->dst.box.z,
278 false)) {
279 info->mask &= ~PIPE_MASK_RGBA;
280 }
281 }
282
283 static struct pipe_surface *
v3d_get_blit_surface(struct pipe_context * pctx,struct pipe_resource * prsc,enum pipe_format format,unsigned level,int16_t layer)284 v3d_get_blit_surface(struct pipe_context *pctx,
285 struct pipe_resource *prsc,
286 enum pipe_format format,
287 unsigned level,
288 int16_t layer)
289 {
290 struct pipe_surface tmpl;
291
292 tmpl.format = format;
293 tmpl.u.tex.level = level;
294 tmpl.u.tex.first_layer = layer;
295 tmpl.u.tex.last_layer = layer;
296
297 return pctx->create_surface(pctx, prsc, &tmpl);
298 }
299
300 static bool
is_tile_unaligned(unsigned size,unsigned tile_size)301 is_tile_unaligned(unsigned size, unsigned tile_size)
302 {
303 return size & (tile_size - 1);
304 }
305
306 static void
v3d_tlb_blit(struct pipe_context * pctx,struct pipe_blit_info * info)307 v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
308 {
309 struct v3d_context *v3d = v3d_context(pctx);
310 struct v3d_screen *screen = v3d->screen;
311 struct v3d_device_info *devinfo = &screen->devinfo;
312
313 if (!info->mask)
314 return;
315
316 bool is_color_blit = info->mask & PIPE_MASK_RGBA;
317 bool is_depth_blit = info->mask & PIPE_MASK_Z;
318 bool is_stencil_blit = info->mask & PIPE_MASK_S;
319
320 /* We should receive either a depth/stencil blit, or color blit, but
321 * not both.
322 */
323 assert ((is_color_blit && !is_depth_blit && !is_stencil_blit) ||
324 (!is_color_blit && (is_depth_blit || is_stencil_blit)));
325
326 if (info->scissor_enable)
327 return;
328
329 if (info->src.box.x != info->dst.box.x ||
330 info->src.box.y != info->dst.box.y ||
331 info->src.box.width != info->dst.box.width ||
332 info->src.box.height != info->dst.box.height)
333 return;
334
335 if (is_color_blit &&
336 util_format_is_depth_or_stencil(info->dst.format))
337 return;
338
339 if ((is_depth_blit || is_stencil_blit) &&
340 !util_format_is_depth_or_stencil(info->dst.format))
341 return;
342
343 if (!v3d_rt_format_supported(devinfo, info->src.format))
344 return;
345
346 if (v3d_get_rt_format(devinfo, info->src.format) !=
347 v3d_get_rt_format(devinfo, info->dst.format))
348 return;
349
350 bool msaa = (info->src.resource->nr_samples > 1 ||
351 info->dst.resource->nr_samples > 1);
352 bool is_msaa_resolve = (info->src.resource->nr_samples > 1 &&
353 info->dst.resource->nr_samples < 2);
354
355 if (is_msaa_resolve &&
356 !v3d_format_supports_tlb_msaa_resolve(devinfo, info->src.format))
357 return;
358
359 v3d_flush_jobs_writing_resource(v3d, info->src.resource, V3D_FLUSH_DEFAULT, false);
360
361 struct pipe_surface *dst_surf =
362 v3d_get_blit_surface(pctx, info->dst.resource, info->dst.format, info->dst.level, info->dst.box.z);
363 struct pipe_surface *src_surf =
364 v3d_get_blit_surface(pctx, info->src.resource, info->src.format, info->src.level, info->src.box.z);
365
366 struct pipe_surface *surfaces[V3D_MAX_DRAW_BUFFERS] = { 0 };
367 if (is_color_blit)
368 surfaces[0] = dst_surf;
369
370 bool double_buffer = V3D_DBG(DOUBLE_BUFFER) && !msaa;
371
372 uint32_t tile_width, tile_height, max_bpp;
373 v3d_get_tile_buffer_size(devinfo, msaa, double_buffer,
374 is_color_blit ? 1 : 0, surfaces, src_surf,
375 &tile_width, &tile_height, &max_bpp);
376
377 int dst_surface_width = u_minify(info->dst.resource->width0,
378 info->dst.level);
379 int dst_surface_height = u_minify(info->dst.resource->height0,
380 info->dst.level);
381 if (is_tile_unaligned(info->dst.box.x, tile_width) ||
382 is_tile_unaligned(info->dst.box.y, tile_height) ||
383 (is_tile_unaligned(info->dst.box.width, tile_width) &&
384 info->dst.box.x + info->dst.box.width != dst_surface_width) ||
385 (is_tile_unaligned(info->dst.box.height, tile_height) &&
386 info->dst.box.y + info->dst.box.height != dst_surface_height)) {
387 pipe_surface_reference(&dst_surf, NULL);
388 pipe_surface_reference(&src_surf, NULL);
389 return;
390 }
391
392 struct v3d_job *job = v3d_get_job(v3d,
393 is_color_blit ? 1u : 0u,
394 surfaces,
395 is_color_blit ? NULL : dst_surf,
396 src_surf);
397 job->msaa = msaa;
398 job->double_buffer = double_buffer;
399 job->tile_width = tile_width;
400 job->tile_height = tile_height;
401 job->internal_bpp = max_bpp;
402 job->draw_min_x = info->dst.box.x;
403 job->draw_min_y = info->dst.box.y;
404 job->draw_max_x = info->dst.box.x + info->dst.box.width;
405 job->draw_max_y = info->dst.box.y + info->dst.box.height;
406 job->scissor.disabled = false;
407
408 /* The simulator complains if we do a TLB load from a source with a
409 * stride that is smaller than the destination's, so we program the
410 * 'frame region' to match the smallest dimensions of the two surfaces.
411 * This should be fine because we only get here if the src and dst boxes
412 * match, so we know the blit involves the same tiles on both surfaces.
413 */
414 job->draw_width = MIN2(dst_surf->width, src_surf->width);
415 job->draw_height = MIN2(dst_surf->height, src_surf->height);
416 job->draw_tiles_x = DIV_ROUND_UP(job->draw_width,
417 job->tile_width);
418 job->draw_tiles_y = DIV_ROUND_UP(job->draw_height,
419 job->tile_height);
420
421 job->needs_flush = true;
422 job->num_layers = info->dst.box.depth;
423
424 job->store = 0;
425 if (is_color_blit) {
426 job->store |= PIPE_CLEAR_COLOR0;
427 info->mask &= ~PIPE_MASK_RGBA;
428 }
429 if (is_depth_blit) {
430 job->store |= PIPE_CLEAR_DEPTH;
431 info->mask &= ~PIPE_MASK_Z;
432 }
433 if (is_stencil_blit){
434 job->store |= PIPE_CLEAR_STENCIL;
435 info->mask &= ~PIPE_MASK_S;
436 }
437
438 v3d_X(devinfo, start_binning)(v3d, job);
439
440 v3d_job_submit(v3d, job);
441
442 pipe_surface_reference(&dst_surf, NULL);
443 pipe_surface_reference(&src_surf, NULL);
444 }
445
446 /**
447 * Creates the VS of the custom blit shader to convert YUV plane from
448 * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
449 * This vertex shader is mostly a pass-through VS.
450 */
451 static void *
v3d_get_sand8_vs(struct pipe_context * pctx)452 v3d_get_sand8_vs(struct pipe_context *pctx)
453 {
454 struct v3d_context *v3d = v3d_context(pctx);
455 struct pipe_screen *pscreen = pctx->screen;
456
457 if (v3d->sand8_blit_vs)
458 return v3d->sand8_blit_vs;
459
460 const struct nir_shader_compiler_options *options =
461 pscreen->get_compiler_options(pscreen,
462 PIPE_SHADER_IR_NIR,
463 PIPE_SHADER_VERTEX);
464
465 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
466 options,
467 "sand8_blit_vs");
468
469 const struct glsl_type *vec4 = glsl_vec4_type();
470 nir_variable *pos_in = nir_variable_create(b.shader,
471 nir_var_shader_in,
472 vec4, "pos");
473
474 nir_variable *pos_out = nir_variable_create(b.shader,
475 nir_var_shader_out,
476 vec4, "gl_Position");
477 pos_out->data.location = VARYING_SLOT_POS;
478 nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
479
480 v3d->sand8_blit_vs = pipe_shader_from_nir(pctx, b.shader);
481
482 return v3d->sand8_blit_vs;
483 }
484 /**
485 * Creates the FS of the custom blit shader to convert YUV plane from
486 * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
487 * The result texture is equivalent to a chroma (cpp=2) or luma (cpp=1)
488 * plane for a NV12 format without the SAND modifier.
489 */
490 static void *
v3d_get_sand8_fs(struct pipe_context * pctx,int cpp)491 v3d_get_sand8_fs(struct pipe_context *pctx, int cpp)
492 {
493 struct v3d_context *v3d = v3d_context(pctx);
494 struct pipe_screen *pscreen = pctx->screen;
495 struct pipe_shader_state **cached_shader;
496 const char *name;
497
498 if (cpp == 1) {
499 cached_shader = &v3d->sand8_blit_fs_luma;
500 name = "sand8_blit_fs_luma";
501 } else {
502 cached_shader = &v3d->sand8_blit_fs_chroma;
503 name = "sand8_blit_fs_chroma";
504 }
505
506 if (*cached_shader)
507 return *cached_shader;
508
509 const struct nir_shader_compiler_options *options =
510 pscreen->get_compiler_options(pscreen,
511 PIPE_SHADER_IR_NIR,
512 PIPE_SHADER_FRAGMENT);
513
514 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
515 options, "%s", name);
516 b.shader->info.num_ubos = 1;
517 b.shader->num_outputs = 1;
518 b.shader->num_inputs = 1;
519 b.shader->num_uniforms = 1;
520
521 const struct glsl_type *vec4 = glsl_vec4_type();
522
523 const struct glsl_type *glsl_uint = glsl_uint_type();
524
525 nir_variable *color_out =
526 nir_variable_create(b.shader, nir_var_shader_out,
527 vec4, "f_color");
528 color_out->data.location = FRAG_RESULT_COLOR;
529
530 nir_variable *pos_in =
531 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
532 pos_in->data.location = VARYING_SLOT_POS;
533 nir_def *pos = nir_load_var(&b, pos_in);
534
535 nir_def *zero = nir_imm_int(&b, 0);
536 nir_def *one = nir_imm_int(&b, 1);
537 nir_def *two = nir_imm_int(&b, 2);
538 nir_def *six = nir_imm_int(&b, 6);
539 nir_def *seven = nir_imm_int(&b, 7);
540 nir_def *eight = nir_imm_int(&b, 8);
541
542 nir_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
543 nir_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
544
545 nir_variable *stride_in =
546 nir_variable_create(b.shader, nir_var_uniform, glsl_uint,
547 "sand8_stride");
548 nir_def *stride =
549 nir_load_uniform(&b, 1, 32, zero,
550 .base = stride_in->data.driver_location,
551 .range = 4,
552 .dest_type = nir_type_uint32);
553
554 nir_def *x_offset;
555 nir_def *y_offset;
556
557 /* UIF tiled format is composed by UIF blocks, Each block has
558 * four 64 byte microtiles. Inside each microtile pixels are stored
559 * in raster format. But microtiles have different dimensions
560 * based in the bits per pixel of the image.
561 *
562 * 8bpp microtile dimensions are 8x8
563 * 16bpp microtile dimensions are 8x4
564 * 32bpp microtile dimensions are 4x4
565 *
566 * As we are reading and writing with 32bpp to optimize
567 * the number of texture operations during the blit, we need
568 * to adjust the offsets were we read and write as data will
569 * be later read using 8bpp (luma) and 16bpp (chroma).
570 *
571 * For chroma 8x4 16bpp raster order is compatible with 4x4
572 * 32bpp. In both layouts each line has 8*2 == 4*4 == 16 bytes.
573 * But luma 8x8 8bpp raster order is not compatible
574 * with 4x4 32bpp. 8bpp has 8 bytes per line, and 32bpp has
575 * 16 bytes per line. So if we read a 8bpp texture that was
576 * written as 32bpp texture. Bytes would be misplaced.
577 *
578 * inter/intra_utile_x_offsets takes care of mapping the offsets
579 * between microtiles to deal with this issue for luma planes.
580 */
581 if (cpp == 1) {
582 nir_def *intra_utile_x_offset =
583 nir_ishl(&b, nir_iand_imm(&b, x, 1), two);
584 nir_def *inter_utile_x_offset =
585 nir_ishl(&b, nir_iand_imm(&b, x, 60), one);
586 nir_def *stripe_offset=
587 nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 6),
588 stride),
589 seven);
590
591 x_offset = nir_iadd(&b, stripe_offset,
592 nir_iadd(&b, intra_utile_x_offset,
593 inter_utile_x_offset));
594 y_offset = nir_iadd(&b,
595 nir_ishl(&b, nir_iand_imm(&b, x, 2), six),
596 nir_ishl(&b, y, eight));
597 } else {
598 nir_def *stripe_offset=
599 nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 5),
600 stride),
601 seven);
602 x_offset = nir_iadd(&b, stripe_offset,
603 nir_ishl(&b, nir_iand_imm(&b, x, 31), two));
604 y_offset = nir_ishl(&b, y, seven);
605 }
606 nir_def *ubo_offset = nir_iadd(&b, x_offset, y_offset);
607 nir_def *load =
608 nir_load_ubo(&b, 1, 32, zero, ubo_offset,
609 .align_mul = 4,
610 .align_offset = 0,
611 .range_base = 0,
612 .range = ~0);
613
614 nir_def *output = nir_unpack_unorm_4x8(&b, load);
615
616 nir_store_var(&b, color_out,
617 output,
618 0xF);
619
620
621 *cached_shader = pipe_shader_from_nir(pctx, b.shader);
622
623 return *cached_shader;
624 }
625
626 /**
627 * Turns NV12 with SAND8 format modifier from raster-order with interleaved
628 * luma and chroma 128-byte-wide-columns to tiled format for luma and chroma.
629 *
630 * This implementation is based on vc4_yuv_blit.
631 */
632 static void
v3d_sand8_blit(struct pipe_context * pctx,struct pipe_blit_info * info)633 v3d_sand8_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
634 {
635 struct v3d_context *v3d = v3d_context(pctx);
636 struct v3d_resource *src = v3d_resource(info->src.resource);
637 ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource);
638
639 if (!src->sand_col128_stride)
640 return;
641 if (src->tiled)
642 return;
643 if (src->base.format != PIPE_FORMAT_R8_UNORM &&
644 src->base.format != PIPE_FORMAT_R8G8_UNORM)
645 return;
646 if (!(info->mask & PIPE_MASK_RGBA))
647 return;
648
649 assert(dst->base.format == src->base.format);
650 assert(dst->tiled);
651
652 assert(info->src.box.x == 0 && info->dst.box.x == 0);
653 assert(info->src.box.y == 0 && info->dst.box.y == 0);
654 assert(info->src.box.width == info->dst.box.width);
655 assert(info->src.box.height == info->dst.box.height);
656
657 v3d_blitter_save(v3d, true, info->render_condition_enable);
658
659 struct pipe_surface dst_tmpl;
660 util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
661 info->dst.level, info->dst.box.z);
662 /* Although the src textures are cpp=1 or cpp=2, the dst texture
663 * uses a cpp=4 dst texture. So, all read/write texture ops will
664 * be done using 32-bit read and writes.
665 */
666 dst_tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM;
667 struct pipe_surface *dst_surf =
668 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
669 if (!dst_surf) {
670 fprintf(stderr, "Failed to create YUV dst surface\n");
671 util_blitter_unset_running_flag(v3d->blitter);
672 return;
673 }
674
675 uint32_t sand8_stride = src->sand_col128_stride;
676
677 /* Adjust the dimensions of dst luma/chroma to match src
678 * size now we are using a cpp=4 format. Next dimension take into
679 * account the UIF microtile layouts.
680 */
681 dst_surf->width = align(dst_surf->width, 8) / 2;
682 if (src->cpp == 1)
683 dst_surf->height /= 2;
684
685 /* Set the constant buffer. */
686 struct pipe_constant_buffer cb_uniforms = {
687 .user_buffer = &sand8_stride,
688 .buffer_size = sizeof(sand8_stride),
689 };
690
691 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false,
692 &cb_uniforms);
693 struct pipe_constant_buffer saved_fs_cb1 = { 0 };
694 pipe_resource_reference(&saved_fs_cb1.buffer,
695 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1].buffer);
696 memcpy(&saved_fs_cb1, &v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1],
697 sizeof(struct pipe_constant_buffer));
698 struct pipe_constant_buffer cb_src = {
699 .buffer = info->src.resource,
700 .buffer_offset = src->slices[info->src.level].offset,
701 .buffer_size = (src->bo->size -
702 src->slices[info->src.level].offset),
703 };
704 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false,
705 &cb_src);
706 /* Unbind the textures, to make sure we don't try to recurse into the
707 * shadow blit.
708 */
709 pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL);
710 pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
711
712 util_blitter_custom_shader(v3d->blitter, dst_surf,
713 v3d_get_sand8_vs(pctx),
714 v3d_get_sand8_fs(pctx, src->cpp));
715
716 util_blitter_restore_textures(v3d->blitter);
717 util_blitter_restore_constant_buffer_state(v3d->blitter);
718
719 /* Restore cb1 (util_blitter doesn't handle this one). */
720 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, true,
721 &saved_fs_cb1);
722
723 pipe_surface_reference(&dst_surf, NULL);
724
725 info->mask &= ~PIPE_MASK_RGBA;
726 }
727
728
729 /**
730 * Creates the VS of the custom blit shader to convert YUV plane from
731 * the P030 format with BROADCOM_SAND_COL128 modifier to UIF tiled P010
732 * format.
733 * This vertex shader is mostly a pass-through VS.
734 */
735 static void *
v3d_get_sand30_vs(struct pipe_context * pctx)736 v3d_get_sand30_vs(struct pipe_context *pctx)
737 {
738 struct v3d_context *v3d = v3d_context(pctx);
739 struct pipe_screen *pscreen = pctx->screen;
740
741 if (v3d->sand30_blit_vs)
742 return v3d->sand30_blit_vs;
743
744 const struct nir_shader_compiler_options *options =
745 pscreen->get_compiler_options(pscreen,
746 PIPE_SHADER_IR_NIR,
747 PIPE_SHADER_VERTEX);
748
749 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
750 options,
751 "sand30_blit_vs");
752
753 const struct glsl_type *vec4 = glsl_vec4_type();
754 nir_variable *pos_in = nir_variable_create(b.shader,
755 nir_var_shader_in,
756 vec4, "pos");
757
758 nir_variable *pos_out = nir_variable_create(b.shader,
759 nir_var_shader_out,
760 vec4, "gl_Position");
761 pos_out->data.location = VARYING_SLOT_POS;
762 nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
763
764 v3d->sand30_blit_vs = pipe_shader_from_nir(pctx, b.shader);
765
766 return v3d->sand30_blit_vs;
767 }
768
769 /**
770 * Given an uvec2 value with rgb10a2 components, it extracts four 10-bit
771 * components, then converts them from unorm10 to unorm16 and returns them
772 * in an uvec4. The start parameter defines where the sequence of 4 values
773 * begins.
774 */
775 static nir_def *
extract_unorm_2xrgb10a2_component_to_4xunorm16(nir_builder * b,nir_def * value,nir_def * start)776 extract_unorm_2xrgb10a2_component_to_4xunorm16(nir_builder *b,
777 nir_def *value,
778 nir_def *start)
779 {
780 const unsigned mask = BITFIELD_MASK(10);
781
782 nir_def *shiftw0 = nir_imul_imm(b, start, 10);
783 nir_def *word0 = nir_iand_imm(b, nir_channel(b, value, 0),
784 BITFIELD_MASK(30));
785 nir_def *finalword0 = nir_ushr(b, word0, shiftw0);
786 nir_def *word1 = nir_channel(b, value, 1);
787 nir_def *shiftw0tow1 = nir_isub_imm(b, 30, shiftw0);
788 nir_def *word1toword0 = nir_ishl(b, word1, shiftw0tow1);
789 finalword0 = nir_ior(b, finalword0, word1toword0);
790 nir_def *finalword1 = nir_ushr(b, word1, shiftw0);
791
792 nir_def *val0 = nir_ishl_imm(b, nir_iand_imm(b, finalword0,
793 mask), 6);
794 nir_def *val1 = nir_ishr_imm(b, nir_iand_imm(b, finalword0,
795 mask << 10), 4);
796 nir_def *val2 = nir_ishr_imm(b, nir_iand_imm(b, finalword0,
797 mask << 20), 14);
798 nir_def *val3 = nir_ishl_imm(b, nir_iand_imm(b, finalword1,
799 mask), 6);
800
801 return nir_vec4(b, val0, val1, val2, val3);
802 }
803
804 /**
805 * Creates the FS of the custom blit shader to convert YUV plane from
806 * the P030 format with BROADCOM_SAND_COL128 modifier to UIF tiled P10
807 * format a 16-bit representation per component.
808 *
809 * The result texture is equivalent to a chroma (cpp=4) or luma (cpp=2)
810 * plane for a P010 format without the SAND128 modifier.
811 */
812 static void *
v3d_get_sand30_fs(struct pipe_context * pctx)813 v3d_get_sand30_fs(struct pipe_context *pctx)
814 {
815 struct v3d_context *v3d = v3d_context(pctx);
816 struct pipe_screen *pscreen = pctx->screen;
817
818 if (v3d->sand30_blit_fs)
819 return v3d->sand30_blit_fs;
820
821 const struct nir_shader_compiler_options *options =
822 pscreen->get_compiler_options(pscreen,
823 PIPE_SHADER_IR_NIR,
824 PIPE_SHADER_FRAGMENT);
825
826 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
827 options,
828 "sand30_blit_fs");
829 b.shader->info.num_ubos = 1;
830 b.shader->num_outputs = 1;
831 b.shader->num_inputs = 1;
832 b.shader->num_uniforms = 1;
833
834 const struct glsl_type *vec4 = glsl_vec4_type();
835
836 const struct glsl_type *glsl_uint = glsl_uint_type();
837 const struct glsl_type *glsl_uvec4 = glsl_vector_type(GLSL_TYPE_UINT,
838 4);
839
840 nir_variable *color_out = nir_variable_create(b.shader,
841 nir_var_shader_out,
842 glsl_uvec4, "f_color");
843 color_out->data.location = FRAG_RESULT_COLOR;
844
845 nir_variable *pos_in =
846 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
847 pos_in->data.location = VARYING_SLOT_POS;
848 nir_def *pos = nir_load_var(&b, pos_in);
849
850 nir_def *zero = nir_imm_int(&b, 0);
851 nir_def *three = nir_imm_int(&b, 3);
852
853 /* With a SAND128 stripe, in 128-bytes with rgb10a2 format we have 96
854 * 10-bit values. So, it represents 96 pixels for Y plane and 48 pixels
855 * for UV frame, but as we are reading 4 10-bit-values at a time we
856 * will have 24 groups (pixels) of 4 10-bit values.
857 */
858 uint32_t pixels_stripe = 24;
859
860 nir_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
861 nir_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
862
863 /* UIF tiled format is composed by UIF blocks. Each block has four 64
864 * byte microtiles. Inside each microtile pixels are stored in raster
865 * format. But microtiles have different dimensions based in the bits
866 * per pixel of the image.
867 *
868 * 16bpp microtile dimensions are 8x4
869 * 32bpp microtile dimensions are 4x4
870 * 64bpp microtile dimensions are 4x2
871 *
872 * As we are reading and writing with 64bpp to optimize the number of
873 * texture operations during the blit, we adjust the offsets so when
874 * the microtile is sampled using the 16bpp (luma) and the 32bpp
875 * (chroma) the expected pixels are in the correct position, that
876 * would be different if we were using a 64bpp sampling.
877 *
878 * For luma 8x4 16bpp and chroma 4x4 32bpp luma raster order is
879 * incompatible with 4x2 64bpp. 16bpp has 16 bytes per line, 32bpp has
880 * also 16byte per line. But 64bpp has 32 bytes per line. So if we
881 * read a 16bpp or 32bpp texture that was written as 64bpp texture,
882 * pixels would be misplaced.
883 *
884 * inter/intra_utile_x_offsets takes care of mapping the offsets
885 * between microtiles to deal with this issue for luma and chroma
886 * planes.
887 *
888 * We reduce the luma and chroma planes to the same blit case
889 * because 16bpp and 32bpp have compatible microtile raster layout.
890 * So just doubling the width of the chroma plane before calling the
891 * blit makes them equivalent.
892 */
893 nir_variable *stride_in =
894 nir_variable_create(b.shader, nir_var_uniform,
895 glsl_uint, "sand30_stride");
896 nir_def *stride =
897 nir_load_uniform(&b, 1, 32, zero,
898 .base = stride_in->data.driver_location,
899 .range = 4,
900 .dest_type = nir_type_uint32);
901
902 nir_def *real_x = nir_ior(&b, nir_iand_imm(&b, x, 1),
903 nir_ishl_imm(&b,nir_ushr_imm(&b, x, 2),
904 1));
905 nir_def *x_pos_in_stripe = nir_umod_imm(&b, real_x, pixels_stripe);
906 nir_def *component = nir_umod(&b, real_x, three);
907 nir_def *intra_utile_x_offset = nir_ishl_imm(&b, component, 2);
908
909 nir_def *inter_utile_x_offset =
910 nir_ishl_imm(&b, nir_udiv_imm(&b, x_pos_in_stripe, 3), 4);
911
912 nir_def *stripe_offset=
913 nir_ishl_imm(&b,
914 nir_imul(&b,
915 nir_udiv_imm(&b, real_x, pixels_stripe),
916 stride),
917 7);
918
919 nir_def *x_offset = nir_iadd(&b, stripe_offset,
920 nir_iadd(&b, intra_utile_x_offset,
921 inter_utile_x_offset));
922 nir_def *y_offset =
923 nir_iadd(&b, nir_ishl_imm(&b, nir_iand_imm(&b, x, 2), 6),
924 nir_ishl_imm(&b, y, 8));
925 nir_def *ubo_offset = nir_iadd(&b, x_offset, y_offset);
926
927 nir_def *load = nir_load_ubo(&b, 2, 32, zero, ubo_offset,
928 .align_mul = 8,
929 .align_offset = 0,
930 .range_base = 0,
931 .range = ~0);
932 nir_def *output =
933 extract_unorm_2xrgb10a2_component_to_4xunorm16(&b, load,
934 component);
935 nir_store_var(&b, color_out,
936 output,
937 0xf);
938
939 v3d->sand30_blit_fs = pipe_shader_from_nir(pctx, b.shader);
940
941 return v3d->sand30_blit_fs;
942 }
943
944 /**
945 * Turns P030 with SAND30 format modifier from raster-order with interleaved
946 * luma and chroma 128-byte-wide-columns to a P010 UIF tiled format for luma
947 * and chroma.
948 */
949 static void
v3d_sand30_blit(struct pipe_context * pctx,struct pipe_blit_info * info)950 v3d_sand30_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
951 {
952 struct v3d_context *v3d = v3d_context(pctx);
953 struct v3d_resource *src = v3d_resource(info->src.resource);
954 ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource);
955
956 if (!src->sand_col128_stride)
957 return;
958 if (src->tiled)
959 return;
960 if (src->base.format != PIPE_FORMAT_R16_UNORM &&
961 src->base.format != PIPE_FORMAT_R16G16_UNORM)
962 return;
963 if (!(info->mask & PIPE_MASK_RGBA))
964 return;
965
966 assert(dst->base.format == src->base.format);
967 assert(dst->tiled);
968
969 assert(info->src.box.x == 0 && info->dst.box.x == 0);
970 assert(info->src.box.y == 0 && info->dst.box.y == 0);
971 assert(info->src.box.width == info->dst.box.width);
972 assert(info->src.box.height == info->dst.box.height);
973
974 v3d_blitter_save(v3d, true, info->render_condition_enable);
975
976 struct pipe_surface dst_tmpl;
977 util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
978 info->dst.level, info->dst.box.z);
979
980 dst_tmpl.format = PIPE_FORMAT_R16G16B16A16_UINT;
981
982 struct pipe_surface *dst_surf =
983 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
984 if (!dst_surf) {
985 fprintf(stderr, "Failed to create YUV dst surface\n");
986 util_blitter_unset_running_flag(v3d->blitter);
987 return;
988 }
989
990 uint32_t sand30_stride = src->sand_col128_stride;
991
992 /* Adjust the dimensions of dst luma/chroma to match src
993 * size now we are using a cpp=8 format. Next dimension take into
994 * account the UIF microtile layouts.
995 */
996 dst_surf->height /= 2;
997 dst_surf->width = align(dst_surf->width, 8);
998 if (src->cpp == 2)
999 dst_surf->width /= 2;
1000 /* Set the constant buffer. */
1001 struct pipe_constant_buffer cb_uniforms = {
1002 .user_buffer = &sand30_stride,
1003 .buffer_size = sizeof(sand30_stride),
1004 };
1005
1006 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false,
1007 &cb_uniforms);
1008
1009 struct pipe_constant_buffer saved_fs_cb1 = { 0 };
1010 pipe_resource_reference(&saved_fs_cb1.buffer,
1011 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1].buffer);
1012 memcpy(&saved_fs_cb1, &v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1],
1013 sizeof(struct pipe_constant_buffer));
1014 struct pipe_constant_buffer cb_src = {
1015 .buffer = info->src.resource,
1016 .buffer_offset = src->slices[info->src.level].offset,
1017 .buffer_size = (src->bo->size -
1018 src->slices[info->src.level].offset),
1019 };
1020 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false,
1021 &cb_src);
1022 /* Unbind the textures, to make sure we don't try to recurse into the
1023 * shadow blit.
1024 */
1025 pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false,
1026 NULL);
1027 pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
1028
1029 util_blitter_custom_shader(v3d->blitter, dst_surf,
1030 v3d_get_sand30_vs(pctx),
1031 v3d_get_sand30_fs(pctx));
1032
1033 util_blitter_restore_textures(v3d->blitter);
1034 util_blitter_restore_constant_buffer_state(v3d->blitter);
1035
1036 /* Restore cb1 (util_blitter doesn't handle this one). */
1037 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, true,
1038 &saved_fs_cb1);
1039 pipe_surface_reference(&dst_surf, NULL);
1040
1041 info->mask &= ~PIPE_MASK_RGBA;
1042 return;
1043 }
1044
1045 /* Optimal hardware path for blitting pixels.
1046 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1047 */
1048 void
v3d_blit(struct pipe_context * pctx,const struct pipe_blit_info * blit_info)1049 v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1050 {
1051 struct v3d_context *v3d = v3d_context(pctx);
1052 struct pipe_blit_info info = *blit_info;
1053
1054 if (info.render_condition_enable && !v3d_render_condition_check(v3d))
1055 return;
1056
1057 v3d_sand30_blit(pctx, &info);
1058
1059 v3d_sand8_blit(pctx, &info);
1060
1061 v3d_tfu_blit(pctx, &info);
1062
1063 v3d_tlb_blit(pctx, &info);
1064
1065 v3d_stencil_blit(pctx, &info);
1066
1067 v3d_render_blit(pctx, &info);
1068
1069 /* Flush our blit jobs immediately. They're unlikely to get reused by
1070 * normal drawing or other blits, and without flushing we can easily
1071 * run into unexpected OOMs when blits are used for a large series of
1072 * texture uploads before using the textures.
1073 */
1074 v3d_flush_jobs_writing_resource(v3d, info.dst.resource,
1075 V3D_FLUSH_DEFAULT, false);
1076 }
1077