1 /*
2 * Copyright © 2015-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir/pipe_nir.h"
25 #include "util/format/u_format.h"
26 #include "util/perf/cpu_trace.h"
27 #include "util/u_surface.h"
28 #include "util/u_blitter.h"
29 #include "compiler/nir/nir_builder.h"
30 #include "v3d_context.h"
31 #include "broadcom/common/v3d_tiling.h"
32 #include "broadcom/common/v3d_tfu.h"
33
34 /**
35 * The param @op_blit is used to tell if we are saving state for blitter_blit
36 * (if true) or blitter_clear (if false). If other blitter functions are used
37 * that require different state we may need something more elaborated than
38 * this.
39 */
40
41 void
v3d_blitter_save(struct v3d_context * v3d,enum v3d_blitter_op op)42 v3d_blitter_save(struct v3d_context *v3d, enum v3d_blitter_op op)
43 {
44 util_blitter_save_fragment_constant_buffer_slot(v3d->blitter,
45 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb);
46 util_blitter_save_vertex_buffers(v3d->blitter, v3d->vertexbuf.vb, v3d->vertexbuf.count);
47 util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx);
48 util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs);
49 util_blitter_save_geometry_shader(v3d->blitter, v3d->prog.bind_gs);
50 util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
51 v3d->streamout.targets, MESA_PRIM_UNKNOWN);
52 util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer);
53 util_blitter_save_viewport(v3d->blitter, &v3d->viewport);
54 util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs);
55 util_blitter_save_blend(v3d->blitter, v3d->blend);
56 util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa);
57 util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref);
58 util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask, 0);
59 util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
60 v3d->streamout.targets, MESA_PRIM_UNKNOWN);
61
62 if (op & V3D_SAVE_FRAMEBUFFER)
63 util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer);
64
65 if (op & V3D_SAVE_TEXTURES) {
66 util_blitter_save_scissor(v3d->blitter, &v3d->scissor);
67 util_blitter_save_fragment_sampler_states(v3d->blitter,
68 v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers,
69 (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers);
70 util_blitter_save_fragment_sampler_views(v3d->blitter,
71 v3d->tex[PIPE_SHADER_FRAGMENT].num_textures,
72 v3d->tex[PIPE_SHADER_FRAGMENT].textures);
73 }
74
75 if (!(op & V3D_DISABLE_RENDER_COND)) {
76 util_blitter_save_render_condition(v3d->blitter, v3d->cond_query,
77 v3d->cond_cond, v3d->cond_mode);
78 }
79 }
80
81 static void
v3d_render_blit(struct pipe_context * ctx,struct pipe_blit_info * info)82 v3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
83 {
84 struct v3d_context *v3d = v3d_context(ctx);
85 struct v3d_resource *src = v3d_resource(info->src.resource);
86 struct pipe_resource *tiled = NULL;
87
88 if (!info->mask)
89 return;
90
91 if (!src->tiled &&
92 info->src.resource->target != PIPE_TEXTURE_1D &&
93 info->src.resource->target != PIPE_TEXTURE_1D_ARRAY) {
94 struct pipe_box box = {
95 .x = 0,
96 .y = 0,
97 .width = u_minify(info->src.resource->width0,
98 info->src.level),
99 .height = u_minify(info->src.resource->height0,
100 info->src.level),
101 .depth = 1,
102 };
103 struct pipe_resource tmpl = {
104 .target = info->src.resource->target,
105 .format = info->src.resource->format,
106 .width0 = box.width,
107 .height0 = box.height,
108 .depth0 = 1,
109 .array_size = 1,
110 };
111 tiled = ctx->screen->resource_create(ctx->screen, &tmpl);
112 if (!tiled) {
113 fprintf(stderr, "Failed to create tiled blit temp\n");
114 return;
115 }
116 ctx->resource_copy_region(ctx,
117 tiled, 0,
118 0, 0, 0,
119 info->src.resource, info->src.level,
120 &box);
121 info->src.level = 0;
122 info->src.resource = tiled;
123 }
124
125 if (!util_blitter_is_blit_supported(v3d->blitter, info)) {
126 fprintf(stderr, "blit unsupported %s -> %s\n",
127 util_format_short_name(info->src.format),
128 util_format_short_name(info->dst.format));
129 return;
130 }
131
132 MESA_TRACE_FUNC();
133
134 v3d_blitter_save(v3d, info->render_condition_enable ?
135 V3D_BLIT_COND : V3D_BLIT);
136 util_blitter_blit(v3d->blitter, info, NULL);
137
138 pipe_resource_reference(&tiled, NULL);
139 info->mask = 0;
140 }
141
142 /* Implement stencil blits by reinterpreting the stencil data as an RGBA8888
143 * or R8 texture.
144 */
145 static void
v3d_stencil_blit(struct pipe_context * ctx,struct pipe_blit_info * info)146 v3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
147 {
148 struct v3d_context *v3d = v3d_context(ctx);
149 struct v3d_resource *src = v3d_resource(info->src.resource);
150 struct v3d_resource *dst = v3d_resource(info->dst.resource);
151 enum pipe_format src_format, dst_format;
152
153 if ((info->mask & PIPE_MASK_S) == 0)
154 return;
155
156 MESA_TRACE_FUNC();
157
158 if (src->separate_stencil) {
159 src = src->separate_stencil;
160 src_format = PIPE_FORMAT_R8_UINT;
161 } else {
162 src_format = PIPE_FORMAT_RGBA8888_UINT;
163 }
164
165 if (dst->separate_stencil) {
166 dst = dst->separate_stencil;
167 dst_format = PIPE_FORMAT_R8_UINT;
168 } else {
169 dst_format = PIPE_FORMAT_RGBA8888_UINT;
170 }
171
172 /* Initialize the surface. */
173 struct pipe_surface dst_tmpl = {
174 .u.tex = {
175 .level = info->dst.level,
176 .first_layer = info->dst.box.z,
177 .last_layer = info->dst.box.z,
178 },
179 .format = dst_format,
180 };
181 struct pipe_surface *dst_surf =
182 ctx->create_surface(ctx, &dst->base, &dst_tmpl);
183
184 /* Initialize the sampler view. */
185 struct pipe_sampler_view src_tmpl = {
186 .target = (src->base.target == PIPE_TEXTURE_CUBE_ARRAY) ?
187 PIPE_TEXTURE_2D_ARRAY :
188 src->base.target,
189 .format = src_format,
190 .u.tex = {
191 .first_level = info->src.level,
192 .last_level = info->src.level,
193 .first_layer = 0,
194 .last_layer = (PIPE_TEXTURE_3D ?
195 u_minify(src->base.depth0,
196 info->src.level) - 1 :
197 src->base.array_size - 1),
198 },
199 .swizzle_r = PIPE_SWIZZLE_X,
200 .swizzle_g = PIPE_SWIZZLE_Y,
201 .swizzle_b = PIPE_SWIZZLE_Z,
202 .swizzle_a = PIPE_SWIZZLE_W,
203 };
204 struct pipe_sampler_view *src_view =
205 ctx->create_sampler_view(ctx, &src->base, &src_tmpl);
206
207 v3d_blitter_save(v3d, info->render_condition_enable ?
208 V3D_BLIT_COND : V3D_BLIT);
209 util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box,
210 src_view, &info->src.box,
211 src->base.width0, src->base.height0,
212 PIPE_MASK_R,
213 PIPE_TEX_FILTER_NEAREST,
214 info->scissor_enable ? &info->scissor : NULL,
215 info->alpha_blend, false, 0, NULL);
216
217 pipe_surface_reference(&dst_surf, NULL);
218 pipe_sampler_view_reference(&src_view, NULL);
219
220 info->mask &= ~PIPE_MASK_S;
221 }
222
223 bool
v3d_generate_mipmap(struct pipe_context * pctx,struct pipe_resource * prsc,enum pipe_format format,unsigned int base_level,unsigned int last_level,unsigned int first_layer,unsigned int last_layer)224 v3d_generate_mipmap(struct pipe_context *pctx,
225 struct pipe_resource *prsc,
226 enum pipe_format format,
227 unsigned int base_level,
228 unsigned int last_level,
229 unsigned int first_layer,
230 unsigned int last_layer)
231 {
232 if (format != prsc->format)
233 return false;
234
235 /* We could maybe support looping over layers for array textures, but
236 * we definitely don't support 3D.
237 */
238 if (first_layer != last_layer)
239 return false;
240
241 struct v3d_context *v3d = v3d_context(pctx);
242 struct v3d_screen *screen = v3d->screen;
243 struct v3d_device_info *devinfo = &screen->devinfo;
244
245 return v3d_X(devinfo, tfu)(pctx,
246 prsc, prsc,
247 base_level,
248 base_level, last_level,
249 first_layer, first_layer,
250 true);
251 }
252
253 static void
v3d_tfu_blit(struct pipe_context * pctx,struct pipe_blit_info * info)254 v3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
255 {
256 int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
257 int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
258
259 if ((info->mask & PIPE_MASK_RGBA) == 0)
260 return;
261
262 if (info->scissor_enable ||
263 info->swizzle_enable ||
264 info->dst.box.x != 0 ||
265 info->dst.box.y != 0 ||
266 info->dst.box.width != dst_width ||
267 info->dst.box.height != dst_height ||
268 info->dst.box.depth != 1 ||
269 info->src.box.x != 0 ||
270 info->src.box.y != 0 ||
271 info->src.box.width != info->dst.box.width ||
272 info->src.box.height != info->dst.box.height ||
273 info->src.box.depth != 1) {
274 return;
275 }
276
277 if (info->dst.format != info->src.format)
278 return;
279
280 struct v3d_context *v3d = v3d_context(pctx);
281 struct v3d_screen *screen = v3d->screen;
282 struct v3d_device_info *devinfo = &screen->devinfo;
283
284 if (v3d_X(devinfo, tfu)(pctx, info->dst.resource, info->src.resource,
285 info->src.level,
286 info->dst.level, info->dst.level,
287 info->src.box.z, info->dst.box.z,
288 false)) {
289 info->mask &= ~PIPE_MASK_RGBA;
290 }
291 }
292
293 static struct pipe_surface *
v3d_get_blit_surface(struct pipe_context * pctx,struct pipe_resource * prsc,enum pipe_format format,unsigned level,int16_t layer)294 v3d_get_blit_surface(struct pipe_context *pctx,
295 struct pipe_resource *prsc,
296 enum pipe_format format,
297 unsigned level,
298 int16_t layer)
299 {
300 struct pipe_surface tmpl;
301
302 tmpl.format = format;
303 tmpl.u.tex.level = level;
304 tmpl.u.tex.first_layer = layer;
305 tmpl.u.tex.last_layer = layer;
306
307 return pctx->create_surface(pctx, prsc, &tmpl);
308 }
309
310 static bool
is_tile_unaligned(unsigned size,unsigned tile_size)311 is_tile_unaligned(unsigned size, unsigned tile_size)
312 {
313 return size & (tile_size - 1);
314 }
315
316 static bool
check_tlb_blit_ok(struct v3d_device_info * devinfo,struct pipe_blit_info * info)317 check_tlb_blit_ok(struct v3d_device_info *devinfo, struct pipe_blit_info *info)
318 {
319 if (!info->mask)
320 return false;
321
322 bool is_color_blit = info->mask & PIPE_MASK_RGBA;
323 bool is_depth_blit = info->mask & PIPE_MASK_Z;
324 bool is_stencil_blit = info->mask & PIPE_MASK_S;
325
326 /* We should receive either a depth/stencil blit, or color blit, but
327 * not both.
328 */
329 assert ((is_color_blit && !is_depth_blit && !is_stencil_blit) ||
330 (!is_color_blit && (is_depth_blit || is_stencil_blit)));
331
332 if (info->scissor_enable || info->swizzle_enable)
333 return false;
334
335 if (info->src.box.x != info->dst.box.x ||
336 info->src.box.y != info->dst.box.y ||
337 info->src.box.width != info->dst.box.width ||
338 info->src.box.height != info->dst.box.height)
339 return false;
340
341 if (is_color_blit &&
342 util_format_is_depth_or_stencil(info->dst.format))
343 return false;
344
345 if ((is_depth_blit || is_stencil_blit) &&
346 !util_format_is_depth_or_stencil(info->dst.format))
347 return false;
348
349 if (!v3d_rt_format_supported(devinfo, info->src.format))
350 return false;
351
352 if (v3d_get_rt_format(devinfo, info->src.format) !=
353 v3d_get_rt_format(devinfo, info->dst.format))
354 return false;
355
356 bool is_msaa_resolve = (info->src.resource->nr_samples > 1 &&
357 info->dst.resource->nr_samples < 2);
358
359 if (is_msaa_resolve &&
360 !v3d_format_supports_tlb_msaa_resolve(devinfo, info->src.format))
361 return false;
362
363 return true;
364 }
365
366 /* This checks if we can implement the blit straight from a job that we have
367 * not yet flushed, including MSAA resolves.
368 */
369 static void
v3d_tlb_blit_fast(struct pipe_context * pctx,struct pipe_blit_info * info)370 v3d_tlb_blit_fast(struct pipe_context *pctx, struct pipe_blit_info *info)
371 {
372 struct v3d_context *v3d = v3d_context(pctx);
373 struct v3d_screen *screen = v3d->screen;
374 struct v3d_device_info *devinfo = &screen->devinfo;
375
376 if (!check_tlb_blit_ok(devinfo, info))
377 return;
378
379 /* FIXME: support depth/stencil */
380 if (!(info->mask & PIPE_MASK_RGBA))
381 return;
382
383 /* Can't blit from 1x to 4x since the render target configuration
384 * for the job would not be compatible.
385 */
386 if (info->src.resource->nr_samples < info->dst.resource->nr_samples)
387 return;
388
389 /* Can't blit form RGBX to RGBA since we can't force A=1 on tile
390 * stores.
391 */
392 if (util_format_has_alpha1(info->src.format) &&
393 !util_format_has_alpha1(info->dst.format))
394 return;
395
396 /* Find the job that writes the blit source */
397 struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
398 info->src.resource);
399 if (!entry)
400 return;
401
402 struct v3d_job *job = entry->data;
403 assert(job);
404
405 /* The TLB store will involve the same area and tiles as the job
406 * writing to the resource, so only do this if we are blitting the
407 * full resource and the job is writing the full resource.
408 */
409 int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
410 int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
411 if (info->dst.box.x != 0 || info->dst.box.width != dst_width ||
412 info->dst.box.y != 0 || info->dst.box.height != dst_height ||
413 job->draw_min_x != 0 || job->draw_min_y != 0 ||
414 job->draw_max_x != dst_width || job->draw_max_y != dst_height) {
415 return;
416 }
417
418 /* Blits are specified for single-layered FBOs, if the job that
419 * produces the blit source is multilayered we would attempt to
420 * blit all layers and write out of bounds on the destination.
421 */
422 if (job->num_layers > 1)
423 return;
424
425 /* Find which color attachment in the job is the blit source */
426 int idx = -1;
427 for (int i = 0; i < job->nr_cbufs; i++) {
428 if (!job->cbufs[i] ||
429 job->cbufs[i]->texture != info->src.resource) {
430 continue;
431 }
432 idx = i;
433 break;
434 }
435
436 if (idx < 0)
437 return;
438
439 struct pipe_surface *dbuf =
440 v3d_get_blit_surface(pctx, info->dst.resource,
441 info->dst.format, info->dst.level,
442 info->dst.box.z);
443
444 /* The job's RT setup must be compatible with the blit buffer. */
445 struct v3d_surface *ssurf = v3d_surface(job->cbufs[idx]);
446 struct v3d_surface *rsurf = v3d_surface(dbuf);
447 if (ssurf->internal_bpp < rsurf->internal_bpp)
448 return;
449 if (ssurf->internal_type != rsurf->internal_type)
450 return;
451
452 MESA_TRACE_FUNC();
453
454 /* If we had any other jobs writing to the blit dst we should submit
455 * them now before we blit.
456 *
457 * FIXME: We could just drop these jobs completely if they are
458 * rendering a subset of the resource being blit here.
459 */
460 v3d_flush_jobs_writing_resource(v3d, info->dst.resource,
461 V3D_FLUSH_DEFAULT, false);
462
463 /* Program the job to blit from the TLB into the destination buffer */
464 info->mask &= ~PIPE_MASK_RGBA;
465 job->blit_tlb |= PIPE_CLEAR_COLOR0 << idx;
466 job->dbuf = dbuf;
467 v3d_job_add_write_resource(job, info->dst.resource);
468
469 /* Submit the job immediately, since otherwise we could accumulate
470 * draw calls happening after the blit.
471 */
472 v3d_job_submit(v3d, job);
473 }
474
475 static void
v3d_tlb_blit(struct pipe_context * pctx,struct pipe_blit_info * info)476 v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
477 {
478 struct v3d_context *v3d = v3d_context(pctx);
479 struct v3d_screen *screen = v3d->screen;
480 struct v3d_device_info *devinfo = &screen->devinfo;
481
482 if (!check_tlb_blit_ok(devinfo, info))
483 return;
484
485 MESA_TRACE_FUNC();
486
487 v3d_flush_jobs_writing_resource(v3d, info->src.resource, V3D_FLUSH_DEFAULT, false);
488
489 struct pipe_surface *dst_surf =
490 v3d_get_blit_surface(pctx, info->dst.resource, info->dst.format, info->dst.level, info->dst.box.z);
491 struct pipe_surface *src_surf =
492 v3d_get_blit_surface(pctx, info->src.resource, info->src.format, info->src.level, info->src.box.z);
493
494 bool is_color_blit = info->mask & PIPE_MASK_RGBA;
495 bool is_depth_blit = info->mask & PIPE_MASK_Z;
496 bool is_stencil_blit = info->mask & PIPE_MASK_S;
497
498 struct pipe_surface *surfaces[V3D_MAX_DRAW_BUFFERS] = { 0 };
499 if (is_color_blit)
500 surfaces[0] = dst_surf;
501
502 bool msaa = (info->src.resource->nr_samples > 1 ||
503 info->dst.resource->nr_samples > 1);
504
505 bool double_buffer = false;
506
507 uint32_t tile_width, tile_height, max_bpp;
508 v3d_get_tile_buffer_size(devinfo, msaa, double_buffer,
509 is_color_blit ? 1 : 0, surfaces, src_surf,
510 &tile_width, &tile_height, &max_bpp);
511
512 int dst_surface_width = u_minify(info->dst.resource->width0,
513 info->dst.level);
514 int dst_surface_height = u_minify(info->dst.resource->height0,
515 info->dst.level);
516 if (is_tile_unaligned(info->dst.box.x, tile_width) ||
517 is_tile_unaligned(info->dst.box.y, tile_height) ||
518 (is_tile_unaligned(info->dst.box.width, tile_width) &&
519 info->dst.box.x + info->dst.box.width != dst_surface_width) ||
520 (is_tile_unaligned(info->dst.box.height, tile_height) &&
521 info->dst.box.y + info->dst.box.height != dst_surface_height)) {
522 pipe_surface_reference(&dst_surf, NULL);
523 pipe_surface_reference(&src_surf, NULL);
524 return;
525 }
526
527 struct v3d_job *job = v3d_get_job(v3d,
528 is_color_blit ? 1u : 0u,
529 surfaces,
530 is_color_blit ? NULL : dst_surf,
531 src_surf);
532 job->msaa = msaa;
533 job->double_buffer = double_buffer;
534 job->can_use_double_buffer = !job->msaa && V3D_DBG(DOUBLE_BUFFER);
535 job->internal_bpp = max_bpp;
536 job->draw_min_x = info->dst.box.x;
537 job->draw_min_y = info->dst.box.y;
538 job->draw_max_x = info->dst.box.x + info->dst.box.width;
539 job->draw_max_y = info->dst.box.y + info->dst.box.height;
540 job->scissor.disabled = false;
541
542 /* The simulator complains if we do a TLB load from a source with a
543 * stride that is smaller than the destination's, so we program the
544 * 'frame region' to match the smallest dimensions of the two surfaces.
545 * This should be fine because we only get here if the src and dst boxes
546 * match, so we know the blit involves the same tiles on both surfaces.
547 */
548 job->draw_width = MIN2(dst_surf->width, src_surf->width);
549 job->draw_height = MIN2(dst_surf->height, src_surf->height);
550
551 job->tile_desc.width = tile_width;
552 job->tile_desc.height = tile_height;
553 job->tile_desc.draw_x = DIV_ROUND_UP(job->draw_width,
554 job->tile_desc.width);
555 job->tile_desc.draw_y = DIV_ROUND_UP(job->draw_height,
556 job->tile_desc.height);
557
558 job->needs_flush = true;
559 job->num_layers = info->dst.box.depth;
560
561 job->store = 0;
562 if (is_color_blit) {
563 job->store |= PIPE_CLEAR_COLOR0;
564 info->mask &= ~PIPE_MASK_RGBA;
565 }
566 if (is_depth_blit) {
567 job->store |= PIPE_CLEAR_DEPTH;
568 info->mask &= ~PIPE_MASK_Z;
569 }
570 if (is_stencil_blit){
571 job->store |= PIPE_CLEAR_STENCIL;
572 info->mask &= ~PIPE_MASK_S;
573 }
574
575 v3d_X(devinfo, start_binning)(v3d, job);
576
577 v3d_job_submit(v3d, job);
578
579 pipe_surface_reference(&dst_surf, NULL);
580 pipe_surface_reference(&src_surf, NULL);
581 }
582
583 /**
584 * Creates the VS of the custom blit shader to convert YUV plane from
585 * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
586 * This vertex shader is mostly a pass-through VS.
587 */
588 static void *
v3d_get_sand8_vs(struct pipe_context * pctx)589 v3d_get_sand8_vs(struct pipe_context *pctx)
590 {
591 struct v3d_context *v3d = v3d_context(pctx);
592 struct pipe_screen *pscreen = pctx->screen;
593
594 if (v3d->sand8_blit_vs)
595 return v3d->sand8_blit_vs;
596
597 const struct nir_shader_compiler_options *options =
598 pscreen->get_compiler_options(pscreen,
599 PIPE_SHADER_IR_NIR,
600 PIPE_SHADER_VERTEX);
601
602 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
603 options,
604 "sand8_blit_vs");
605
606 const struct glsl_type *vec4 = glsl_vec4_type();
607 nir_variable *pos_in = nir_variable_create(b.shader,
608 nir_var_shader_in,
609 vec4, "pos");
610
611 nir_variable *pos_out = nir_variable_create(b.shader,
612 nir_var_shader_out,
613 vec4, "gl_Position");
614 pos_out->data.location = VARYING_SLOT_POS;
615 nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
616
617 v3d->sand8_blit_vs = pipe_shader_from_nir(pctx, b.shader);
618
619 return v3d->sand8_blit_vs;
620 }
621 /**
622 * Creates the FS of the custom blit shader to convert YUV plane from
623 * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format.
624 * The result texture is equivalent to a chroma (cpp=2) or luma (cpp=1)
625 * plane for a NV12 format without the SAND modifier.
626 */
627 static void *
v3d_get_sand8_fs(struct pipe_context * pctx,int cpp)628 v3d_get_sand8_fs(struct pipe_context *pctx, int cpp)
629 {
630 struct v3d_context *v3d = v3d_context(pctx);
631 struct pipe_screen *pscreen = pctx->screen;
632 struct pipe_shader_state **cached_shader;
633 const char *name;
634
635 if (cpp == 1) {
636 cached_shader = &v3d->sand8_blit_fs_luma;
637 name = "sand8_blit_fs_luma";
638 } else {
639 cached_shader = &v3d->sand8_blit_fs_chroma;
640 name = "sand8_blit_fs_chroma";
641 }
642
643 if (*cached_shader)
644 return *cached_shader;
645
646 const struct nir_shader_compiler_options *options =
647 pscreen->get_compiler_options(pscreen,
648 PIPE_SHADER_IR_NIR,
649 PIPE_SHADER_FRAGMENT);
650
651 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
652 options, "%s", name);
653 b.shader->info.num_ubos = 1;
654 b.shader->num_outputs = 1;
655 b.shader->num_inputs = 1;
656 b.shader->num_uniforms = 1;
657
658 const struct glsl_type *vec4 = glsl_vec4_type();
659
660 const struct glsl_type *glsl_uint = glsl_uint_type();
661
662 nir_variable *color_out =
663 nir_variable_create(b.shader, nir_var_shader_out,
664 vec4, "f_color");
665 color_out->data.location = FRAG_RESULT_COLOR;
666
667 nir_variable *pos_in =
668 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
669 pos_in->data.location = VARYING_SLOT_POS;
670 nir_def *pos = nir_load_var(&b, pos_in);
671
672 nir_def *zero = nir_imm_int(&b, 0);
673 nir_def *one = nir_imm_int(&b, 1);
674 nir_def *two = nir_imm_int(&b, 2);
675 nir_def *six = nir_imm_int(&b, 6);
676 nir_def *seven = nir_imm_int(&b, 7);
677 nir_def *eight = nir_imm_int(&b, 8);
678
679 nir_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
680 nir_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
681
682 nir_variable *stride_in =
683 nir_variable_create(b.shader, nir_var_uniform, glsl_uint,
684 "sand8_stride");
685 nir_def *stride =
686 nir_load_uniform(&b, 1, 32, zero,
687 .base = stride_in->data.driver_location,
688 .range = 4,
689 .dest_type = nir_type_uint32);
690
691 nir_def *x_offset;
692 nir_def *y_offset;
693
694 /* UIF tiled format is composed by UIF blocks, Each block has
695 * four 64 byte microtiles. Inside each microtile pixels are stored
696 * in raster format. But microtiles have different dimensions
697 * based in the bits per pixel of the image.
698 *
699 * 8bpp microtile dimensions are 8x8
700 * 16bpp microtile dimensions are 8x4
701 * 32bpp microtile dimensions are 4x4
702 *
703 * As we are reading and writing with 32bpp to optimize
704 * the number of texture operations during the blit, we need
705 * to adjust the offsets were we read and write as data will
706 * be later read using 8bpp (luma) and 16bpp (chroma).
707 *
708 * For chroma 8x4 16bpp raster order is compatible with 4x4
709 * 32bpp. In both layouts each line has 8*2 == 4*4 == 16 bytes.
710 * But luma 8x8 8bpp raster order is not compatible
711 * with 4x4 32bpp. 8bpp has 8 bytes per line, and 32bpp has
712 * 16 bytes per line. So if we read a 8bpp texture that was
713 * written as 32bpp texture. Bytes would be misplaced.
714 *
715 * inter/intra_utile_x_offsets takes care of mapping the offsets
716 * between microtiles to deal with this issue for luma planes.
717 */
718 if (cpp == 1) {
719 nir_def *intra_utile_x_offset =
720 nir_ishl(&b, nir_iand_imm(&b, x, 1), two);
721 nir_def *inter_utile_x_offset =
722 nir_ishl(&b, nir_iand_imm(&b, x, 60), one);
723 nir_def *stripe_offset=
724 nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 6),
725 stride),
726 seven);
727
728 x_offset = nir_iadd(&b, stripe_offset,
729 nir_iadd(&b, intra_utile_x_offset,
730 inter_utile_x_offset));
731 y_offset = nir_iadd(&b,
732 nir_ishl(&b, nir_iand_imm(&b, x, 2), six),
733 nir_ishl(&b, y, eight));
734 } else {
735 nir_def *stripe_offset=
736 nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 5),
737 stride),
738 seven);
739 x_offset = nir_iadd(&b, stripe_offset,
740 nir_ishl(&b, nir_iand_imm(&b, x, 31), two));
741 y_offset = nir_ishl(&b, y, seven);
742 }
743 nir_def *ubo_offset = nir_iadd(&b, x_offset, y_offset);
744 nir_def *load =
745 nir_load_ubo(&b, 1, 32, zero, ubo_offset,
746 .align_mul = 4,
747 .align_offset = 0,
748 .range_base = 0,
749 .range = ~0);
750
751 nir_def *output = nir_unpack_unorm_4x8(&b, load);
752
753 nir_store_var(&b, color_out,
754 output,
755 0xF);
756
757
758 *cached_shader = pipe_shader_from_nir(pctx, b.shader);
759
760 return *cached_shader;
761 }
762
763 /**
764 * Turns NV12 with SAND8 format modifier from raster-order with interleaved
765 * luma and chroma 128-byte-wide-columns to tiled format for luma and chroma.
766 *
767 * This implementation is based on vc4_yuv_blit.
768 */
769 static void
v3d_sand8_blit(struct pipe_context * pctx,struct pipe_blit_info * info)770 v3d_sand8_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
771 {
772 struct v3d_context *v3d = v3d_context(pctx);
773 struct v3d_resource *src = v3d_resource(info->src.resource);
774 ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource);
775
776 if (!src->sand_col128_stride)
777 return;
778 if (src->tiled)
779 return;
780 if (src->base.format != PIPE_FORMAT_R8_UNORM &&
781 src->base.format != PIPE_FORMAT_R8G8_UNORM)
782 return;
783 if (!(info->mask & PIPE_MASK_RGBA))
784 return;
785
786 assert(dst->base.format == src->base.format);
787 assert(dst->tiled);
788
789 assert(info->src.box.x == 0 && info->dst.box.x == 0);
790 assert(info->src.box.y == 0 && info->dst.box.y == 0);
791 assert(info->src.box.width == info->dst.box.width);
792 assert(info->src.box.height == info->dst.box.height);
793
794 MESA_TRACE_FUNC();
795
796 v3d_blitter_save(v3d, info->render_condition_enable ?
797 V3D_BLIT_COND : V3D_BLIT);
798
799 struct pipe_surface dst_tmpl;
800 util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
801 info->dst.level, info->dst.box.z);
802 /* Although the src textures are cpp=1 or cpp=2, the dst texture
803 * uses a cpp=4 dst texture. So, all read/write texture ops will
804 * be done using 32-bit read and writes.
805 */
806 dst_tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM;
807 struct pipe_surface *dst_surf =
808 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
809 if (!dst_surf) {
810 fprintf(stderr, "Failed to create YUV dst surface\n");
811 util_blitter_unset_running_flag(v3d->blitter);
812 return;
813 }
814
815 uint32_t sand8_stride = src->sand_col128_stride;
816
817 /* Adjust the dimensions of dst luma/chroma to match src
818 * size now we are using a cpp=4 format. Next dimension take into
819 * account the UIF microtile layouts.
820 */
821 dst_surf->width = align(dst_surf->width, 8) / 2;
822 if (src->cpp == 1)
823 dst_surf->height /= 2;
824
825 /* Set the constant buffer. */
826 struct pipe_constant_buffer cb_uniforms = {
827 .user_buffer = &sand8_stride,
828 .buffer_size = sizeof(sand8_stride),
829 };
830
831 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false,
832 &cb_uniforms);
833 struct pipe_constant_buffer saved_fs_cb1 = { 0 };
834 pipe_resource_reference(&saved_fs_cb1.buffer,
835 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1].buffer);
836 memcpy(&saved_fs_cb1, &v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1],
837 sizeof(struct pipe_constant_buffer));
838 struct pipe_constant_buffer cb_src = {
839 .buffer = info->src.resource,
840 .buffer_offset = src->slices[info->src.level].offset,
841 .buffer_size = (src->bo->size -
842 src->slices[info->src.level].offset),
843 };
844 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false,
845 &cb_src);
846 /* Unbind the textures, to make sure we don't try to recurse into the
847 * shadow blit.
848 */
849 pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL);
850 pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
851
852 util_blitter_custom_shader(v3d->blitter, dst_surf,
853 v3d_get_sand8_vs(pctx),
854 v3d_get_sand8_fs(pctx, src->cpp));
855
856 util_blitter_restore_textures(v3d->blitter);
857 util_blitter_restore_constant_buffer_state(v3d->blitter);
858
859 /* Restore cb1 (util_blitter doesn't handle this one). */
860 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, true,
861 &saved_fs_cb1);
862
863 pipe_surface_reference(&dst_surf, NULL);
864
865 info->mask &= ~PIPE_MASK_RGBA;
866 }
867
868
869 /**
870 * Creates the VS of the custom blit shader to convert YUV plane from
871 * the P030 format with BROADCOM_SAND_COL128 modifier to UIF tiled P010
872 * format.
873 * This vertex shader is mostly a pass-through VS.
874 */
875 static void *
v3d_get_sand30_vs(struct pipe_context * pctx)876 v3d_get_sand30_vs(struct pipe_context *pctx)
877 {
878 struct v3d_context *v3d = v3d_context(pctx);
879 struct pipe_screen *pscreen = pctx->screen;
880
881 if (v3d->sand30_blit_vs)
882 return v3d->sand30_blit_vs;
883
884 const struct nir_shader_compiler_options *options =
885 pscreen->get_compiler_options(pscreen,
886 PIPE_SHADER_IR_NIR,
887 PIPE_SHADER_VERTEX);
888
889 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX,
890 options,
891 "sand30_blit_vs");
892
893 const struct glsl_type *vec4 = glsl_vec4_type();
894 nir_variable *pos_in = nir_variable_create(b.shader,
895 nir_var_shader_in,
896 vec4, "pos");
897
898 nir_variable *pos_out = nir_variable_create(b.shader,
899 nir_var_shader_out,
900 vec4, "gl_Position");
901 pos_out->data.location = VARYING_SLOT_POS;
902 nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
903
904 v3d->sand30_blit_vs = pipe_shader_from_nir(pctx, b.shader);
905
906 return v3d->sand30_blit_vs;
907 }
908
909 /**
910 * Given an uvec2 value with rgb10a2 components, it extracts four 10-bit
911 * components, then converts them from unorm10 to unorm16 and returns them
912 * in an uvec4. The start parameter defines where the sequence of 4 values
913 * begins.
914 */
915 static nir_def *
extract_unorm_2xrgb10a2_component_to_4xunorm16(nir_builder * b,nir_def * value,nir_def * start)916 extract_unorm_2xrgb10a2_component_to_4xunorm16(nir_builder *b,
917 nir_def *value,
918 nir_def *start)
919 {
920 const unsigned mask = BITFIELD_MASK(10);
921
922 nir_def *shiftw0 = nir_imul_imm(b, start, 10);
923 nir_def *word0 = nir_iand_imm(b, nir_channel(b, value, 0),
924 BITFIELD_MASK(30));
925 nir_def *finalword0 = nir_ushr(b, word0, shiftw0);
926 nir_def *word1 = nir_channel(b, value, 1);
927 nir_def *shiftw0tow1 = nir_isub_imm(b, 30, shiftw0);
928 nir_def *word1toword0 = nir_ishl(b, word1, shiftw0tow1);
929 finalword0 = nir_ior(b, finalword0, word1toword0);
930 nir_def *finalword1 = nir_ushr(b, word1, shiftw0);
931
932 nir_def *val0 = nir_ishl_imm(b, nir_iand_imm(b, finalword0,
933 mask), 6);
934 nir_def *val1 = nir_ishr_imm(b, nir_iand_imm(b, finalword0,
935 mask << 10), 4);
936 nir_def *val2 = nir_ishr_imm(b, nir_iand_imm(b, finalword0,
937 mask << 20), 14);
938 nir_def *val3 = nir_ishl_imm(b, nir_iand_imm(b, finalword1,
939 mask), 6);
940
941 return nir_vec4(b, val0, val1, val2, val3);
942 }
943
944 /**
945 * Creates the FS of the custom blit shader to convert YUV plane from
946 * the P030 format with BROADCOM_SAND_COL128 modifier to UIF tiled P10
947 * format a 16-bit representation per component.
948 *
949 * The result texture is equivalent to a chroma (cpp=4) or luma (cpp=2)
950 * plane for a P010 format without the SAND128 modifier.
951 */
952 static void *
v3d_get_sand30_fs(struct pipe_context * pctx)953 v3d_get_sand30_fs(struct pipe_context *pctx)
954 {
955 struct v3d_context *v3d = v3d_context(pctx);
956 struct pipe_screen *pscreen = pctx->screen;
957
958 if (v3d->sand30_blit_fs)
959 return v3d->sand30_blit_fs;
960
961 const struct nir_shader_compiler_options *options =
962 pscreen->get_compiler_options(pscreen,
963 PIPE_SHADER_IR_NIR,
964 PIPE_SHADER_FRAGMENT);
965
966 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
967 options,
968 "sand30_blit_fs");
969 b.shader->info.num_ubos = 1;
970 b.shader->num_outputs = 1;
971 b.shader->num_inputs = 1;
972 b.shader->num_uniforms = 1;
973
974 const struct glsl_type *vec4 = glsl_vec4_type();
975
976 const struct glsl_type *glsl_uint = glsl_uint_type();
977 const struct glsl_type *glsl_uvec4 = glsl_vector_type(GLSL_TYPE_UINT,
978 4);
979
980 nir_variable *color_out = nir_variable_create(b.shader,
981 nir_var_shader_out,
982 glsl_uvec4, "f_color");
983 color_out->data.location = FRAG_RESULT_COLOR;
984
985 nir_variable *pos_in =
986 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
987 pos_in->data.location = VARYING_SLOT_POS;
988 nir_def *pos = nir_load_var(&b, pos_in);
989
990 nir_def *zero = nir_imm_int(&b, 0);
991 nir_def *three = nir_imm_int(&b, 3);
992
993 /* With a SAND128 stripe, in 128-bytes with rgb10a2 format we have 96
994 * 10-bit values. So, it represents 96 pixels for Y plane and 48 pixels
995 * for UV frame, but as we are reading 4 10-bit-values at a time we
996 * will have 24 groups (pixels) of 4 10-bit values.
997 */
998 uint32_t pixels_stripe = 24;
999
1000 nir_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
1001 nir_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
1002
1003 /* UIF tiled format is composed by UIF blocks. Each block has four 64
1004 * byte microtiles. Inside each microtile pixels are stored in raster
1005 * format. But microtiles have different dimensions based in the bits
1006 * per pixel of the image.
1007 *
1008 * 16bpp microtile dimensions are 8x4
1009 * 32bpp microtile dimensions are 4x4
1010 * 64bpp microtile dimensions are 4x2
1011 *
1012 * As we are reading and writing with 64bpp to optimize the number of
1013 * texture operations during the blit, we adjust the offsets so when
1014 * the microtile is sampled using the 16bpp (luma) and the 32bpp
1015 * (chroma) the expected pixels are in the correct position, that
1016 * would be different if we were using a 64bpp sampling.
1017 *
1018 * For luma 8x4 16bpp and chroma 4x4 32bpp luma raster order is
1019 * incompatible with 4x2 64bpp. 16bpp has 16 bytes per line, 32bpp has
1020 * also 16byte per line. But 64bpp has 32 bytes per line. So if we
1021 * read a 16bpp or 32bpp texture that was written as 64bpp texture,
1022 * pixels would be misplaced.
1023 *
1024 * inter/intra_utile_x_offsets takes care of mapping the offsets
1025 * between microtiles to deal with this issue for luma and chroma
1026 * planes.
1027 *
1028 * We reduce the luma and chroma planes to the same blit case
1029 * because 16bpp and 32bpp have compatible microtile raster layout.
1030 * So just doubling the width of the chroma plane before calling the
1031 * blit makes them equivalent.
1032 */
1033 nir_variable *stride_in =
1034 nir_variable_create(b.shader, nir_var_uniform,
1035 glsl_uint, "sand30_stride");
1036 nir_def *stride =
1037 nir_load_uniform(&b, 1, 32, zero,
1038 .base = stride_in->data.driver_location,
1039 .range = 4,
1040 .dest_type = nir_type_uint32);
1041
1042 nir_def *real_x = nir_ior(&b, nir_iand_imm(&b, x, 1),
1043 nir_ishl_imm(&b,nir_ushr_imm(&b, x, 2),
1044 1));
1045 nir_def *x_pos_in_stripe = nir_umod_imm(&b, real_x, pixels_stripe);
1046 nir_def *component = nir_umod(&b, real_x, three);
1047 nir_def *intra_utile_x_offset = nir_ishl_imm(&b, component, 2);
1048
1049 nir_def *inter_utile_x_offset =
1050 nir_ishl_imm(&b, nir_udiv_imm(&b, x_pos_in_stripe, 3), 4);
1051
1052 nir_def *stripe_offset=
1053 nir_ishl_imm(&b,
1054 nir_imul(&b,
1055 nir_udiv_imm(&b, real_x, pixels_stripe),
1056 stride),
1057 7);
1058
1059 nir_def *x_offset = nir_iadd(&b, stripe_offset,
1060 nir_iadd(&b, intra_utile_x_offset,
1061 inter_utile_x_offset));
1062 nir_def *y_offset =
1063 nir_iadd(&b, nir_ishl_imm(&b, nir_iand_imm(&b, x, 2), 6),
1064 nir_ishl_imm(&b, y, 8));
1065 nir_def *ubo_offset = nir_iadd(&b, x_offset, y_offset);
1066
1067 nir_def *load = nir_load_ubo(&b, 2, 32, zero, ubo_offset,
1068 .align_mul = 8,
1069 .align_offset = 0,
1070 .range_base = 0,
1071 .range = ~0);
1072 nir_def *output =
1073 extract_unorm_2xrgb10a2_component_to_4xunorm16(&b, load,
1074 component);
1075 nir_store_var(&b, color_out,
1076 output,
1077 0xf);
1078
1079 v3d->sand30_blit_fs = pipe_shader_from_nir(pctx, b.shader);
1080
1081 return v3d->sand30_blit_fs;
1082 }
1083
1084 /**
1085 * Turns P030 with SAND30 format modifier from raster-order with interleaved
1086 * luma and chroma 128-byte-wide-columns to a P010 UIF tiled format for luma
1087 * and chroma.
1088 */
1089 static void
v3d_sand30_blit(struct pipe_context * pctx,struct pipe_blit_info * info)1090 v3d_sand30_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
1091 {
1092 struct v3d_context *v3d = v3d_context(pctx);
1093 struct v3d_resource *src = v3d_resource(info->src.resource);
1094 ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource);
1095
1096 if (!src->sand_col128_stride)
1097 return;
1098 if (src->tiled)
1099 return;
1100 if (src->base.format != PIPE_FORMAT_R16_UNORM &&
1101 src->base.format != PIPE_FORMAT_R16G16_UNORM)
1102 return;
1103 if (!(info->mask & PIPE_MASK_RGBA))
1104 return;
1105 if (info->swizzle_enable)
1106 return;
1107 assert(dst->base.format == src->base.format);
1108 assert(dst->tiled);
1109
1110 assert(info->src.box.x == 0 && info->dst.box.x == 0);
1111 assert(info->src.box.y == 0 && info->dst.box.y == 0);
1112 assert(info->src.box.width == info->dst.box.width);
1113 assert(info->src.box.height == info->dst.box.height);
1114
1115 MESA_TRACE_FUNC();
1116
1117 v3d_blitter_save(v3d, info->render_condition_enable ?
1118 V3D_BLIT_COND : V3D_BLIT);
1119
1120 struct pipe_surface dst_tmpl;
1121 util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
1122 info->dst.level, info->dst.box.z);
1123
1124 dst_tmpl.format = PIPE_FORMAT_R16G16B16A16_UINT;
1125
1126 struct pipe_surface *dst_surf =
1127 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
1128 if (!dst_surf) {
1129 fprintf(stderr, "Failed to create YUV dst surface\n");
1130 util_blitter_unset_running_flag(v3d->blitter);
1131 return;
1132 }
1133
1134 uint32_t sand30_stride = src->sand_col128_stride;
1135
1136 /* Adjust the dimensions of dst luma/chroma to match src
1137 * size now we are using a cpp=8 format. Next dimension take into
1138 * account the UIF microtile layouts.
1139 */
1140 dst_surf->height /= 2;
1141 dst_surf->width = align(dst_surf->width, 8);
1142 if (src->cpp == 2)
1143 dst_surf->width /= 2;
1144 /* Set the constant buffer. */
1145 struct pipe_constant_buffer cb_uniforms = {
1146 .user_buffer = &sand30_stride,
1147 .buffer_size = sizeof(sand30_stride),
1148 };
1149
1150 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false,
1151 &cb_uniforms);
1152
1153 struct pipe_constant_buffer saved_fs_cb1 = { 0 };
1154 pipe_resource_reference(&saved_fs_cb1.buffer,
1155 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1].buffer);
1156 memcpy(&saved_fs_cb1, &v3d->constbuf[PIPE_SHADER_FRAGMENT].cb[1],
1157 sizeof(struct pipe_constant_buffer));
1158 struct pipe_constant_buffer cb_src = {
1159 .buffer = info->src.resource,
1160 .buffer_offset = src->slices[info->src.level].offset,
1161 .buffer_size = (src->bo->size -
1162 src->slices[info->src.level].offset),
1163 };
1164 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false,
1165 &cb_src);
1166 /* Unbind the textures, to make sure we don't try to recurse into the
1167 * shadow blit.
1168 */
1169 pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false,
1170 NULL);
1171 pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
1172
1173 util_blitter_custom_shader(v3d->blitter, dst_surf,
1174 v3d_get_sand30_vs(pctx),
1175 v3d_get_sand30_fs(pctx));
1176
1177 util_blitter_restore_textures(v3d->blitter);
1178 util_blitter_restore_constant_buffer_state(v3d->blitter);
1179
1180 /* Restore cb1 (util_blitter doesn't handle this one). */
1181 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, true,
1182 &saved_fs_cb1);
1183 pipe_surface_reference(&dst_surf, NULL);
1184
1185 info->mask &= ~PIPE_MASK_RGBA;
1186 return;
1187 }
1188
1189 /* Optimal hardware path for blitting pixels.
1190 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1191 */
1192 void
v3d_blit(struct pipe_context * pctx,const struct pipe_blit_info * blit_info)1193 v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1194 {
1195 struct v3d_context *v3d = v3d_context(pctx);
1196 struct pipe_blit_info info = *blit_info;
1197
1198 if (info.render_condition_enable && !v3d_render_condition_check(v3d))
1199 return;
1200
1201 MESA_TRACE_FUNC();
1202
1203 v3d_sand30_blit(pctx, &info);
1204
1205 v3d_sand8_blit(pctx, &info);
1206
1207 v3d_tlb_blit_fast(pctx, &info);
1208
1209 v3d_tfu_blit(pctx, &info);
1210
1211 v3d_tlb_blit(pctx, &info);
1212
1213 v3d_stencil_blit(pctx, &info);
1214
1215 v3d_render_blit(pctx, &info);
1216
1217 /* Flush our blit jobs immediately. They're unlikely to get reused by
1218 * normal drawing or other blits, and without flushing we can easily
1219 * run into unexpected OOMs when blits are used for a large series of
1220 * texture uploads before using the textures.
1221 */
1222 v3d_flush_jobs_writing_resource(v3d, info.dst.resource,
1223 V3D_FLUSH_DEFAULT, false);
1224 }
1225