1 /**************************************************************************
2 *
3 * Copyright 2019 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 * Authors: James Zhu <james.zhu<@amd.com>
27 *
28 **************************************************************************/
29
30 #include <assert.h>
31
32 #include "nir/nir_builder.h"
33 #include "vl_compositor_cs.h"
34
35 struct cs_viewport {
36 float scale_x;
37 float scale_y;
38 struct u_rect area;
39 float crop_x; /* src */
40 float crop_y;
41 int translate_x; /* dst */
42 int translate_y;
43 float sampler0_w;
44 float sampler0_h;
45 float clamp_x;
46 float clamp_y;
47 float chroma_clamp_x;
48 float chroma_clamp_y;
49 float chroma_offset_x;
50 float chroma_offset_y;
51 };
52
53 struct cs_shader {
54 nir_builder b;
55 const char *name;
56 bool array;
57 unsigned num_samplers;
58 nir_variable *samplers[3];
59 nir_variable *image;
60 nir_def *params[8];
61 nir_def *fone;
62 nir_def *fzero;
63 };
64
65 enum coords_flags {
66 COORDS_LUMA = 0x0,
67 COORDS_CHROMA = 0x1,
68 COORDS_CHROMA_OFFSET = 0x2,
69 };
70
cs_create_shader(struct vl_compositor * c,struct cs_shader * s)71 static nir_def *cs_create_shader(struct vl_compositor *c, struct cs_shader *s)
72 {
73 /*
74 #version 450
75
76 layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
77 layout (binding = 0) uniform sampler2DRect samplers[3]; // or sampler2DArray
78 layout (binding = 0) uniform image2D image;
79
80 layout (std140, binding = 0) uniform ubo
81 {
82 vec4 csc_mat[3]; // params[0-2]
83 float luma_min; // params[3].x
84 float luma_max; // params[3].y
85 vec2 scale; // params[3].zw
86 vec2 crop; // params[4].xy
87 ivec2 translate; // params[4].zw
88 vec2 sampler0_wh; // params[5].xy
89 vec2 subsample_ratio; // params[5].zw
90 vec2 coord_clamp; // params[6].xy
91 vec2 chroma_clamp; // params[6].zw
92 vec2 chroma_offset; // params[7].xy
93 };
94
95 void main()
96 {
97 ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
98 }
99 */
100 enum glsl_sampler_dim sampler_dim = s->array ? GLSL_SAMPLER_DIM_2D : GLSL_SAMPLER_DIM_RECT;
101 const struct glsl_type *sampler_type =
102 glsl_sampler_type(sampler_dim, /*is_shadow*/ false, s->array, GLSL_TYPE_FLOAT);
103 const struct glsl_type *image_type =
104 glsl_image_type(GLSL_SAMPLER_DIM_2D, /*is_array*/ false, GLSL_TYPE_FLOAT);
105 const nir_shader_compiler_options *options =
106 c->pipe->screen->get_compiler_options(c->pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
107
108 s->b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "vl:%s", s->name);
109 nir_builder *b = &s->b;
110 b->shader->info.workgroup_size[0] = 8;
111 b->shader->info.workgroup_size[1] = 8;
112 b->shader->info.workgroup_size[2] = 1;
113 b->shader->info.num_ubos = 1;
114 b->shader->num_uniforms = ARRAY_SIZE(s->params);
115
116 nir_def *zero = nir_imm_int(b, 0);
117 for (unsigned i = 0; i < b->shader->num_uniforms; ++i)
118 s->params[i] = nir_load_ubo(b, 4, 32, zero, nir_imm_int(b, i * 16), .align_mul = 4, .range = ~0);
119
120 for (unsigned i = 0; i < s->num_samplers; ++i) {
121 s->samplers[i] = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "sampler");
122 s->samplers[i]->data.binding = i;
123 BITSET_SET(b->shader->info.textures_used, i);
124 BITSET_SET(b->shader->info.samplers_used, i);
125 }
126
127 s->image = nir_variable_create(b->shader, nir_var_image, image_type, "image");
128 s->image->data.binding = 0;
129 BITSET_SET(b->shader->info.images_used, 0);
130
131 s->fone = nir_imm_float(b, 1.0f);
132 s->fzero = nir_imm_float(b, 0.0f);
133
134 nir_def *block_ids = nir_load_workgroup_id(b);
135 nir_def *local_ids = nir_load_local_invocation_id(b);
136 return nir_iadd(b, nir_imul(b, block_ids, nir_imm_ivec3(b, 8, 8, 1)), local_ids);
137 }
138
cs_create_shader_state(struct vl_compositor * c,struct cs_shader * s)139 static void *cs_create_shader_state(struct vl_compositor *c, struct cs_shader *s)
140 {
141 c->pipe->screen->finalize_nir(c->pipe->screen, s->b.shader);
142
143 struct pipe_compute_state state = {0};
144 state.ir_type = PIPE_SHADER_IR_NIR;
145 state.prog = s->b.shader;
146
147 /* create compute shader */
148 return c->pipe->create_compute_state(c->pipe, &state);
149 }
150
cs_translate(struct cs_shader * s,nir_def * src)151 static inline nir_def *cs_translate(struct cs_shader *s, nir_def *src)
152 {
153 /*
154 return src.xy + params[4].zw;
155 */
156 nir_builder *b = &s->b;
157 return nir_iadd(b, src, nir_channels(b, s->params[4], 0x3 << 2));
158 }
159
cs_texture_offset(struct cs_shader * s,nir_def * src)160 static inline nir_def *cs_texture_offset(struct cs_shader *s, nir_def *src)
161 {
162 /*
163 return src.xy + 0.5;
164 */
165 nir_builder *b = &s->b;
166 return nir_fadd_imm(b, src, 0.5f);
167 }
168
cs_chroma_subsampling(struct cs_shader * s,nir_def * src)169 static inline nir_def *cs_chroma_subsampling(struct cs_shader *s, nir_def *src)
170 {
171 /*
172 return src.xy * params[5].zw;
173 */
174 nir_builder *b = &s->b;
175 return nir_fmul(b, src, nir_channels(b, s->params[5], 0x3 << 2));
176 }
177
cs_scale(struct cs_shader * s,nir_def * src)178 static inline nir_def *cs_scale(struct cs_shader *s, nir_def *src)
179 {
180 /*
181 return src.xy / params[3].zw;
182 */
183 nir_builder *b = &s->b;
184 return nir_fdiv(b, src, nir_channels(b, s->params[3], 0x3 << 2));
185 }
186
cs_luma_key(struct cs_shader * s,nir_def * src)187 static inline nir_def *cs_luma_key(struct cs_shader *s, nir_def *src)
188 {
189 /*
190 bool luma_min = params[3].x >= src;
191 bool luma_max = params[3].y < src;
192 return float(luma_min || luma_max);
193 */
194 nir_builder *b = &s->b;
195 nir_def *luma_min = nir_fge(b, nir_channel(b, s->params[3], 0), src);
196 nir_def *luma_max = nir_flt(b, nir_channel(b, s->params[3], 1), src);
197 return nir_b2f32(b, nir_ior(b, luma_min, luma_max));
198 }
199
cs_chroma_offset(struct cs_shader * s,nir_def * src,unsigned flags)200 static inline nir_def *cs_chroma_offset(struct cs_shader *s, nir_def *src, unsigned flags)
201 {
202 /*
203 vec2 offset = params[7].xy;
204 if (flags & COORDS_CHROMA)
205 return src.xy + offset;
206 return offset * -0.5 + src.xy;
207 */
208 nir_builder *b = &s->b;
209 nir_def *offset = nir_channels(b, s->params[7], 0x3);
210 if (flags & COORDS_CHROMA)
211 return nir_fadd(b, src, offset);
212 return nir_ffma_imm1(b, offset, -0.5f, src);
213 }
214
cs_clamp(struct cs_shader * s,nir_def * src,unsigned flags)215 static inline nir_def *cs_clamp(struct cs_shader *s, nir_def *src, unsigned flags)
216 {
217 /*
218 vec2 coord_max;
219 if (flags & COORDS_CHROMA)
220 coord_max = params[6].zw;
221 else
222 coord_max = params[6].xy;
223 return min(src.xy, coord_max);
224 */
225 nir_builder *b = &s->b;
226 nir_component_mask_t mask = flags & COORDS_CHROMA ? 0x3 << 2 : 0x3;
227 return nir_fmin(b, src, nir_channels(b, s->params[6], mask));
228 }
229
cs_normalize(struct cs_shader * s,nir_def * src,unsigned flags)230 static inline nir_def *cs_normalize(struct cs_shader *s, nir_def *src, unsigned flags)
231 {
232 /*
233 vec2 div = params[5].xy;
234 if (flags & COORDS_CHROMA)
235 div = cs_chroma_subsampling(div);
236 return src.xy / div;
237 */
238 nir_builder *b = &s->b;
239 nir_def *div = nir_channels(b, s->params[5], 0x3);
240 if (flags & COORDS_CHROMA)
241 div = cs_chroma_subsampling(s, div);
242 return nir_fdiv(b, src, div);
243 }
244
cs_crop(struct cs_shader * s,nir_def * src,unsigned flags)245 static inline nir_def *cs_crop(struct cs_shader *s, nir_def *src, unsigned flags)
246 {
247 /*
248 vec2 crop = params[4].xy;
249 if (flags & COORDS_CHROMA)
250 crop = cs_chroma_subsampling(crop);
251 return src.xy + crop;
252 */
253 nir_builder *b = &s->b;
254 nir_def *crop = nir_channels(b, s->params[4], 0x3);
255 if (flags & COORDS_CHROMA)
256 crop = cs_chroma_subsampling(s, crop);
257 return nir_fadd(b, src, crop);
258 }
259
cs_color_space_conversion(struct cs_shader * s,nir_def * src,unsigned comp)260 static inline nir_def *cs_color_space_conversion(struct cs_shader *s, nir_def *src, unsigned comp)
261 {
262 /*
263 return dot(src, params[comp]);
264 */
265 nir_builder *b = &s->b;
266 return nir_fdot4(b, src, s->params[comp]);
267 }
268
cs_fetch_texel(struct cs_shader * s,nir_def * coords,unsigned sampler)269 static inline nir_def *cs_fetch_texel(struct cs_shader *s, nir_def *coords, unsigned sampler)
270 {
271 /*
272 return texture(samplers[sampler], s->array ? coords.xyz : coords.xy);
273 */
274 nir_builder *b = &s->b;
275 nir_deref_instr *tex_deref = nir_build_deref_var(b, s->samplers[sampler]);
276 nir_component_mask_t mask = s->array ? 0x7 : 0x3;
277 return nir_tex_deref(b, tex_deref, tex_deref, nir_channels(b, coords, mask));
278 }
279
cs_image_load(struct cs_shader * s,nir_def * pos)280 static inline nir_def *cs_image_load(struct cs_shader *s, nir_def *pos)
281 {
282 /*
283 imageLoad(image, pos.xy);
284 */
285 nir_builder *b = &s->b;
286 nir_def *zero = nir_imm_int(b, 0);
287 nir_def *sample = nir_imm_int(b, 0);
288 pos = nir_pad_vector_imm_int(b, pos, 0, 4);
289 enum glsl_sampler_dim sampler_dim = s->array ? GLSL_SAMPLER_DIM_2D : GLSL_SAMPLER_DIM_RECT;
290 return nir_image_deref_load(b, 4, 32, &nir_build_deref_var(b, s->image)->def, pos, sample, zero,
291 .image_dim = sampler_dim,
292 .image_array = s->array);
293 }
294
cs_image_store(struct cs_shader * s,nir_def * pos,nir_def * color)295 static inline void cs_image_store(struct cs_shader *s, nir_def *pos, nir_def *color)
296 {
297 /*
298 imageStore(image, pos.xy, color);
299 */
300 nir_builder *b = &s->b;
301 nir_def *zero = nir_imm_int(b, 0);
302 nir_def *undef32 = nir_undef(b, 1, 32);
303 pos = nir_pad_vector_imm_int(b, pos, 0, 4);
304 nir_image_deref_store(b, &nir_build_deref_var(b, s->image)->def, pos, undef32, color, zero);
305 }
306
cs_tex_coords(struct cs_shader * s,nir_def * coords,unsigned flags)307 static nir_def *cs_tex_coords(struct cs_shader *s, nir_def *coords, unsigned flags)
308 {
309 nir_builder *b = &s->b;
310
311 coords = nir_u2f32(b, coords);
312 coords = cs_texture_offset(s, coords);
313
314 if (flags & COORDS_CHROMA_OFFSET)
315 coords = cs_chroma_offset(s, coords, flags);
316
317 if (flags & COORDS_CHROMA)
318 coords = cs_chroma_subsampling(s, coords);
319
320 coords = cs_scale(s, coords);
321 coords = cs_crop(s, coords, flags);
322 coords = cs_clamp(s, coords, flags);
323
324 return coords;
325 }
326
create_video_buffer_shader(struct vl_compositor * c)327 static void *create_video_buffer_shader(struct vl_compositor *c)
328 {
329 struct cs_shader s = {
330 .name = "video_buffer",
331 .num_samplers = 3,
332 };
333 nir_builder *b = &s.b;
334
335 nir_def *ipos = cs_create_shader(c, &s);
336 nir_def *pos[2] = {
337 cs_tex_coords(&s, ipos, COORDS_LUMA),
338 cs_tex_coords(&s, ipos, COORDS_CHROMA | COORDS_CHROMA_OFFSET),
339 };
340
341 nir_def *col[3];
342 for (unsigned i = 0; i < 3; ++i)
343 col[i] = cs_fetch_texel(&s, pos[MIN2(i, 1)], i);
344
345 nir_def *alpha = cs_luma_key(&s, col[2]);
346
347 nir_def *color = nir_vec4(b, col[0], col[1], col[2], s.fone);
348 for (unsigned i = 0; i < 3; ++i)
349 col[i] = cs_color_space_conversion(&s, color, i);
350
351 color = nir_vec4(b, col[0], col[1], col[2], alpha);
352 cs_image_store(&s, cs_translate(&s, ipos), color);
353
354 return cs_create_shader_state(c, &s);
355 }
356
create_yuv_progressive_shader(struct vl_compositor * c,enum vl_compositor_plane plane)357 static void *create_yuv_progressive_shader(struct vl_compositor *c, enum vl_compositor_plane plane)
358 {
359 struct cs_shader s = {
360 .name = "yuv_progressive",
361 .num_samplers = 3,
362 };
363 nir_builder *b = &s.b;
364
365 nir_def *ipos = cs_create_shader(c, &s);
366 nir_def *pos = cs_tex_coords(&s, ipos, plane == VL_COMPOSITOR_PLANE_Y ? COORDS_LUMA : COORDS_CHROMA);
367
368 nir_def *color;
369 if (plane != VL_COMPOSITOR_PLANE_UV) {
370 unsigned c = 0;
371 if (plane == VL_COMPOSITOR_PLANE_U)
372 c = 1;
373 else if (plane == VL_COMPOSITOR_PLANE_V)
374 c = 2;
375 color = nir_channel(b, cs_fetch_texel(&s, pos, c), c);
376 } else {
377 nir_def *col1 = cs_fetch_texel(&s, pos, 1);
378 nir_def *col2 = cs_fetch_texel(&s, pos, 2);
379 color = nir_vec2(b, col1, col2);
380 }
381
382 cs_image_store(&s, cs_translate(&s, ipos), color);
383
384 return cs_create_shader_state(c, &s);
385 }
386
create_rgb_yuv_shader(struct vl_compositor * c,enum vl_compositor_plane plane)387 static void *create_rgb_yuv_shader(struct vl_compositor *c, enum vl_compositor_plane plane)
388 {
389 struct cs_shader s = {
390 .name = "rgb_yuv",
391 .num_samplers = 1,
392 };
393 nir_builder *b = &s.b;
394
395 nir_def *ipos = cs_create_shader(c, &s);
396 nir_def *color = NULL;
397
398 if (plane == VL_COMPOSITOR_PLANE_Y) {
399 nir_def *pos = cs_tex_coords(&s, ipos, COORDS_LUMA);
400 color = cs_fetch_texel(&s, pos, 0);
401 } else {
402 /*
403 vec2 pos[4];
404 pos[0] = vec2(ipos);
405 pos[0] = cs_texture_offset(pos[0]);
406 pos[0] = cs_chroma_offset(pos[0], COORDS_LUMA);
407
408 // Sample offset
409 pos[3] = pos[0] + vec2( 0.25, -0.25);
410 pos[2] = pos[0] + vec2(-0.25, 0.25);
411 pos[1] = pos[0] + vec2(-0.25, -0.25);
412 pos[0] = pos[0] + vec2( 0.25, 0.25);
413
414 vec4 col[4];
415 for (uint i = 0; i < 4; ++i) {
416 pos[i] = cs_scale(pos[i]);
417 pos[i] = cs_crop(pos[i], COORDS_LUMA);
418 pos[i] = cs_clamp(pos[i], COORDS_LUMA);
419 col[i] = texture(samp[0], pos[i]);
420 }
421 color = (col[0] + col[1] + col[2] + col[3]) * 0.25;
422 */
423 nir_def *pos[4];
424 pos[0] = nir_u2f32(b, ipos);
425 pos[0] = cs_texture_offset(&s, pos[0]);
426 pos[0] = cs_chroma_offset(&s, pos[0], COORDS_LUMA);
427
428 /* Sample offset */
429 nir_def *o_plus = nir_imm_float(b, 0.25f);
430 nir_def *o_minus = nir_imm_float(b, -0.25f);
431 pos[3] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_minus));
432 pos[2] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_plus));
433 pos[1] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_minus));
434 pos[0] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_plus));
435
436 for (unsigned i = 0; i < 4; ++i) {
437 pos[i] = cs_scale(&s, pos[i]);
438 pos[i] = cs_crop(&s, pos[i], COORDS_LUMA);
439 pos[i] = cs_clamp(&s, pos[i], COORDS_LUMA);
440
441 nir_def *c = cs_fetch_texel(&s, pos[i], 0);
442 color = color ? nir_fadd(b, color, c) : c;
443 }
444 color = nir_fmul_imm(b, color, 0.25f);
445 }
446
447 color = nir_vector_insert_imm(b, color, s.fone, 3);
448
449 if (plane != VL_COMPOSITOR_PLANE_UV) {
450 unsigned c = 0;
451 if (plane == VL_COMPOSITOR_PLANE_U)
452 c = 1;
453 else if (plane == VL_COMPOSITOR_PLANE_V)
454 c = 2;
455 color = cs_color_space_conversion(&s, color, c);
456 } else {
457 nir_def *col1 = cs_color_space_conversion(&s, color, 1);
458 nir_def *col2 = cs_color_space_conversion(&s, color, 2);
459 color = nir_vec2(b, col1, col2);
460 }
461
462 cs_image_store(&s, cs_translate(&s, ipos), color);
463
464 return cs_create_shader_state(c, &s);
465 }
466
create_weave_shader(struct vl_compositor * c,bool rgb,bool y)467 static nir_def *create_weave_shader(struct vl_compositor *c, bool rgb, bool y)
468 {
469 struct cs_shader s = {
470 .name = rgb ? "weave" : y ? "yuv_weave_y" : "yuv_weave_uv",
471 .array = true,
472 .num_samplers = 3,
473 };
474 nir_builder *b = &s.b;
475
476 nir_def *ipos = cs_create_shader(c, &s);
477
478 /*
479 vec2 top_y = cs_texture_offset(vec2(ipos));
480 vec2 top_uv = rgb ? cs_chroma_offset(top_y, COORDS_CHROMA) : top_y;
481 top_uv = cs_chroma_subsampling(top_uv);
482 vec2 down_y = top_y;
483 vec2 down_uv = top_uv;
484
485 top_y = cs_crop(cs_scale(top_y), COORDS_LUMA);
486 top_uv = cs_crop(cs_scale(top_uv), COORDS_CHROMA);
487 down_y = cs_crop(cs_scale(down_y), COORDS_LUMA);
488 down_uv = cs_crop(cs_scale(down_uv), COORDS_CHROMA);
489
490 // Weave offset
491 top_y = top_y + vec2(0.0, 0.25);
492 top_uv = top_uv + vec2(0.0, 0.25);
493 down_y = down_y + vec2(0.0, -0.25);
494 down_uv = down_uv + vec2(0.0, -0.25);
495
496 // Texture layer
497 vec3 tex_layer = vec3(top_y.y, top_uv.y, top_uv.y);
498 tex_layer = tex_layer + round(tex_layer) * -1.0;
499 tex_layer = abs(tex_layer) * 2.0;
500
501 top_y = cs_clamp(top_y, COORDS_LUMA);
502 top_y = cs_normalize(top_y, COORDS_LUMA);
503 top_uv = cs_clamp(top_uv, COORDS_CHROMA);
504 top_uv = cs_normalize(top_uv, COORDS_CHROMA);
505 down_y = cs_clamp(down_y, COORDS_LUMA);
506 down_y = cs_normalize(down_y, COORDS_LUMA);
507 down_uv = cs_clamp(down_uv, COORDS_CHROMA);
508 down_uv = cs_normalize(down_uv, COORDS_CHROMA);
509
510 vec4 top_col, down_col;
511 top_col.x = texture(samp[0], vec3(top_y, 0.0)).x;
512 top_col.y = texture(samp[1], vec3(top_uv, 0.0)).x;
513 top_col.z = texture(samp[2], vec3(top_uv, 0.0)).x;
514 top_col.w = 1.0;
515 down_col.x = texture(samp[0], vec3(down_y, 1.0)).x;
516 down_col.y = texture(samp[1], vec3(down_uv, 1.0)).x;
517 down_col.z = texture(samp[2], vec3(down_uv, 1.0)).x;
518 down_col.w = 1.0;
519
520 vec4 color = mix(down_col, top_col, tex_layer);
521 */
522 nir_def *pos[4];
523 /* Top Y */
524 pos[0] = nir_u2f32(b, ipos);
525 pos[0] = cs_texture_offset(&s, pos[0]);
526 /* Top UV */
527 pos[1] = rgb ? cs_chroma_offset(&s, pos[0], COORDS_CHROMA) : pos[0];
528 pos[1] = cs_chroma_subsampling(&s, pos[1]);
529 /* Down Y */
530 pos[2] = pos[0];
531 /* Down UV */
532 pos[3] = pos[1];
533
534 /* Weave offset */
535 nir_def *o_plus = nir_imm_vec2(b, 0.0f, 0.25f);
536 nir_def *o_minus = nir_imm_vec2(b, 0.0f, -0.25f);
537 for (unsigned i = 0; i < 4; ++i) {
538 pos[i] = cs_scale(&s, pos[i]);
539 pos[i] = cs_crop(&s, pos[i], i % 2 ? COORDS_CHROMA : COORDS_LUMA);
540 pos[i] = nir_fadd(b, pos[i], i < 2 ? o_plus : o_minus);
541 }
542
543 /* Texture layer */
544 nir_def *tex_layer = nir_vec3(b,
545 nir_channel(b, pos[0], 1),
546 nir_channel(b, pos[1], 1),
547 nir_channel(b, pos[1], 1));
548 tex_layer = nir_fadd(b, tex_layer,
549 nir_fneg(b, nir_fround_even(b, tex_layer)));
550 tex_layer = nir_fabs(b, tex_layer);
551 tex_layer = nir_fmul_imm(b, tex_layer, 2.0f);
552
553 nir_def *col[6];
554 for (unsigned i = 0; i < 4; ++i) {
555 bool top = i < 2;
556 unsigned j = top ? 0 : 3;
557 unsigned flags = i % 2 ? COORDS_CHROMA : COORDS_LUMA;
558 pos[i] = cs_clamp(&s, pos[i], flags);
559 pos[i] = cs_normalize(&s, pos[i], flags);
560 pos[i] = nir_vector_insert_imm(b, pos[i],
561 top ? s.fzero : s.fone, 2);
562 if (flags == COORDS_LUMA) {
563 col[j] = cs_fetch_texel(&s, pos[i], 0);
564 } else {
565 col[j + 1] = cs_fetch_texel(&s, pos[i], 1);
566 col[j + 2] = cs_fetch_texel(&s, pos[i], 2);
567 }
568 }
569
570 nir_def *color_top = nir_vec4(b, col[0], col[1], col[2], s.fone);
571 nir_def *color_down = nir_vec4(b, col[3], col[4], col[5], s.fone);
572 nir_def *color = nir_flrp(b, color_down, color_top, tex_layer);
573
574 if (rgb) {
575 nir_def *alpha = cs_luma_key(&s, nir_channel(b, color, 2));
576 for (unsigned i = 0; i < 3; ++i)
577 col[i] = cs_color_space_conversion(&s, color, i);
578 color = nir_vec4(b, col[0], col[1], col[2], alpha);
579 } else if (y) {
580 color = nir_channel(b, color, 0);
581 } else {
582 nir_def *col1 = nir_channel(b, color, 1);
583 nir_def *col2 = nir_channel(b, color, 2);
584 color = nir_vec2(b, col1, col2);
585 }
586
587 cs_image_store(&s, cs_translate(&s, ipos), color);
588
589 return cs_create_shader_state(c, &s);
590 }
591
create_rgba_shader(struct vl_compositor * c)592 static void *create_rgba_shader(struct vl_compositor *c)
593 {
594 struct cs_shader s = {
595 .name = "rgba",
596 .num_samplers = 1,
597 };
598 nir_builder *b = &s.b;
599
600 nir_def *ipos = cs_create_shader(c, &s);
601 nir_def *pos = cs_tex_coords(&s, ipos, COORDS_LUMA);
602 nir_def *pos_out = cs_translate(&s, ipos);
603
604 nir_def *col = cs_fetch_texel(&s, pos, 0);
605 nir_def *blend = cs_image_load(&s, pos_out);
606
607 nir_def *color = nir_flrp(b, blend, col, nir_channel(b, col, 3));
608 color = nir_vector_insert_imm(b, color, s.fone, 3);
609
610 cs_image_store(&s, pos_out, color);
611
612 return cs_create_shader_state(c, &s);
613 }
614
615 static void
cs_launch(struct vl_compositor * c,void * cs,const struct u_rect * draw_area)616 cs_launch(struct vl_compositor *c,
617 void *cs,
618 const struct u_rect *draw_area)
619 {
620 struct pipe_context *ctx = c->pipe;
621 unsigned width, height;
622
623 width = draw_area->x1 - draw_area->x0;
624 height = draw_area->y1 - draw_area->y0;
625
626 /* Bind the image */
627 struct pipe_image_view image = {0};
628 image.resource = c->fb_state.cbufs[0]->texture;
629 image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;
630 image.format = c->fb_state.cbufs[0]->texture->format;
631
632 ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, &image);
633
634 /* Bind compute shader */
635 ctx->bind_compute_state(ctx, cs);
636
637 /* Dispatch compute */
638 struct pipe_grid_info info = {0};
639 info.block[0] = 8;
640 info.last_block[0] = width % info.block[0];
641 info.block[1] = 8;
642 info.last_block[1] = height % info.block[1];
643 info.block[2] = 1;
644 info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
645 info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
646 info.grid[2] = 1;
647
648 ctx->launch_grid(ctx, &info);
649
650 /* Make the result visible to all clients. */
651 ctx->memory_barrier(ctx, PIPE_BARRIER_ALL);
652
653 }
654
655 static inline struct u_rect
calc_drawn_area(struct vl_compositor_state * s,struct vl_compositor_layer * layer)656 calc_drawn_area(struct vl_compositor_state *s,
657 struct vl_compositor_layer *layer)
658 {
659 struct vertex2f tl, br;
660 struct u_rect result;
661
662 assert(s && layer);
663
664 tl = layer->dst.tl;
665 br = layer->dst.br;
666
667 /* Scale */
668 result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
669 result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
670 result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
671 result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
672
673 /* Clip */
674 result.x0 = MAX2(result.x0, s->scissor.minx);
675 result.y0 = MAX2(result.y0, s->scissor.miny);
676 result.x1 = MIN2(result.x1, s->scissor.maxx);
677 result.y1 = MIN2(result.y1, s->scissor.maxy);
678 return result;
679 }
680
681 static inline float
chroma_offset_x(unsigned location)682 chroma_offset_x(unsigned location)
683 {
684 if (location & VL_COMPOSITOR_LOCATION_HORIZONTAL_LEFT)
685 return 0.5f;
686 else
687 return 0.0f;
688 }
689
690 static inline float
chroma_offset_y(unsigned location)691 chroma_offset_y(unsigned location)
692 {
693 if (location & VL_COMPOSITOR_LOCATION_VERTICAL_TOP)
694 return 0.5f;
695 else if (location & VL_COMPOSITOR_LOCATION_VERTICAL_BOTTOM)
696 return -0.5f;
697 else
698 return 0.0f;
699 }
700
701 static bool
set_viewport(struct vl_compositor_state * s,struct cs_viewport * drawn,struct pipe_sampler_view ** samplers)702 set_viewport(struct vl_compositor_state *s,
703 struct cs_viewport *drawn,
704 struct pipe_sampler_view **samplers)
705 {
706 struct pipe_transfer *buf_transfer;
707
708 assert(s && drawn);
709
710 void *ptr = pipe_buffer_map(s->pipe, s->shader_params,
711 PIPE_MAP_WRITE | PIPE_MAP_DISCARD_WHOLE_RESOURCE,
712 &buf_transfer);
713
714 if (!ptr)
715 return false;
716
717 memcpy(ptr, &s->csc_matrix, sizeof(vl_csc_matrix));
718
719 float *ptr_float = (float *)ptr;
720 ptr_float += sizeof(vl_csc_matrix) / sizeof(float);
721 *ptr_float++ = s->luma_min;
722 *ptr_float++ = s->luma_max;
723 *ptr_float++ = drawn->scale_x;
724 *ptr_float++ = drawn->scale_y;
725 *ptr_float++ = drawn->crop_x;
726 *ptr_float++ = drawn->crop_y;
727
728 int *ptr_int = (int *)ptr_float;
729 *ptr_int++ = drawn->translate_x;
730 *ptr_int++ = drawn->translate_y;
731
732 ptr_float = (float *)ptr_int;
733 *ptr_float++ = drawn->sampler0_w;
734 *ptr_float++ = drawn->sampler0_h;
735
736 /* compute_shader_video_buffer uses pixel coordinates based on the
737 * Y sampler dimensions. If U/V are using separate planes and are
738 * subsampled, we need to scale the coordinates */
739 if (samplers[1]) {
740 float h_ratio = samplers[1]->texture->width0 /
741 (float) samplers[0]->texture->width0;
742 *ptr_float++ = h_ratio;
743 float v_ratio = samplers[1]->texture->height0 /
744 (float) samplers[0]->texture->height0;
745 *ptr_float++ = v_ratio;
746 }
747 else {
748 *ptr_float++ = 1.0f;
749 *ptr_float++ = 1.0f;
750 }
751
752
753 *ptr_float++ = drawn->clamp_x;
754 *ptr_float++ = drawn->clamp_y;
755 *ptr_float++ = drawn->chroma_clamp_x;
756 *ptr_float++ = drawn->chroma_clamp_y;
757 *ptr_float++ = drawn->chroma_offset_x;
758 *ptr_float++ = drawn->chroma_offset_y;
759
760 pipe_buffer_unmap(s->pipe, buf_transfer);
761
762 return true;
763 }
764
765 static void
draw_layers(struct vl_compositor * c,struct vl_compositor_state * s,struct u_rect * dirty)766 draw_layers(struct vl_compositor *c,
767 struct vl_compositor_state *s,
768 struct u_rect *dirty)
769 {
770 unsigned i;
771
772 assert(c);
773
774 for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
775 if (s->used_layers & (1 << i)) {
776 struct vl_compositor_layer *layer = &s->layers[i];
777 struct pipe_sampler_view **samplers = &layer->sampler_views[0];
778 unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
779 struct pipe_sampler_view *sampler1 = samplers[1] ? samplers[1] : samplers[0];
780 struct cs_viewport drawn;
781
782 drawn.area = calc_drawn_area(s, layer);
783 drawn.scale_x = layer->viewport.scale[0] /
784 ((float)layer->sampler_views[0]->texture->width0 *
785 (layer->src.br.x - layer->src.tl.x));
786 drawn.scale_y = layer->viewport.scale[1] /
787 ((float)layer->sampler_views[0]->texture->height0 *
788 (layer->src.br.y - layer->src.tl.y));
789 drawn.crop_x = layer->src.tl.x * layer->sampler_views[0]->texture->width0;
790 drawn.translate_x = layer->viewport.translate[0];
791 drawn.crop_y = layer->src.tl.y * layer->sampler_views[0]->texture->height0;
792 drawn.translate_y = layer->viewport.translate[1];
793 drawn.sampler0_w = (float)layer->sampler_views[0]->texture->width0;
794 drawn.sampler0_h = (float)layer->sampler_views[0]->texture->height0;
795 drawn.clamp_x = (float)samplers[0]->texture->width0 * layer->src.br.x - 0.5;
796 drawn.clamp_y = (float)samplers[0]->texture->height0 * layer->src.br.y - 0.5;
797 drawn.chroma_clamp_x = (float)sampler1->texture->width0 * layer->src.br.x - 0.5;
798 drawn.chroma_clamp_y = (float)sampler1->texture->height0 * layer->src.br.y - 0.5;
799 drawn.chroma_offset_x = chroma_offset_x(s->chroma_location);
800 drawn.chroma_offset_y = chroma_offset_y(s->chroma_location);
801 set_viewport(s, &drawn, samplers);
802
803 c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
804 num_sampler_views, layer->samplers);
805 c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0,
806 num_sampler_views, 0, false, samplers);
807
808 cs_launch(c, layer->cs, &(drawn.area));
809
810 /* Unbind. */
811 c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL);
812 c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, false, NULL);
813 c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0, 0,
814 num_sampler_views, false, NULL);
815 c->pipe->bind_compute_state(c->pipe, NULL);
816 c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
817 num_sampler_views, NULL);
818
819 if (dirty) {
820 struct u_rect drawn = calc_drawn_area(s, layer);
821 dirty->x0 = MIN2(drawn.x0, dirty->x0);
822 dirty->y0 = MIN2(drawn.y0, dirty->y0);
823 dirty->x1 = MAX2(drawn.x1, dirty->x1);
824 dirty->y1 = MAX2(drawn.y1, dirty->y1);
825 }
826 }
827 }
828 }
829
830 void
vl_compositor_cs_render(struct vl_compositor_state * s,struct vl_compositor * c,struct pipe_surface * dst_surface,struct u_rect * dirty_area,bool clear_dirty)831 vl_compositor_cs_render(struct vl_compositor_state *s,
832 struct vl_compositor *c,
833 struct pipe_surface *dst_surface,
834 struct u_rect *dirty_area,
835 bool clear_dirty)
836 {
837 assert(c && s);
838 assert(dst_surface);
839
840 c->fb_state.width = dst_surface->width;
841 c->fb_state.height = dst_surface->height;
842 c->fb_state.cbufs[0] = dst_surface;
843
844 if (!s->scissor_valid) {
845 s->scissor.minx = 0;
846 s->scissor.miny = 0;
847 s->scissor.maxx = dst_surface->width;
848 s->scissor.maxy = dst_surface->height;
849 }
850
851 if (clear_dirty && dirty_area &&
852 (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
853
854 c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
855 0, 0, dst_surface->width, dst_surface->height, false);
856 dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY;
857 dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY;
858 }
859
860 pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params);
861
862 draw_layers(c, s, dirty_area);
863 }
864
vl_compositor_cs_init_shaders(struct vl_compositor * c)865 bool vl_compositor_cs_init_shaders(struct vl_compositor *c)
866 {
867 assert(c);
868
869 c->cs_video_buffer = create_video_buffer_shader(c);
870 if (!c->cs_video_buffer) {
871 debug_printf("Unable to create video_buffer compute shader.\n");
872 return false;
873 }
874
875 c->cs_weave_rgb = create_weave_shader(c, true, false);
876 if (!c->cs_weave_rgb) {
877 debug_printf("Unable to create weave_rgb compute shader.\n");
878 return false;
879 }
880
881 c->cs_rgba = create_rgba_shader(c);
882 if (!c->cs_rgba) {
883 debug_printf("Unable to create rgba compute shader.\n");
884 return false;
885 }
886
887 c->cs_yuv.weave.y = create_weave_shader(c, false, true);
888 c->cs_yuv.weave.uv = create_weave_shader(c, false, false);
889 c->cs_yuv.progressive.y = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_Y);
890 c->cs_yuv.progressive.uv = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_UV);
891 c->cs_yuv.progressive.u = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_U);
892 c->cs_yuv.progressive.v = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_V);
893 if (!c->cs_yuv.weave.y || !c->cs_yuv.weave.uv) {
894 debug_printf("Unable to create YCbCr i-to-YCbCr p deint compute shader.\n");
895 return false;
896 }
897 if (!c->cs_yuv.progressive.y || !c->cs_yuv.progressive.uv ||
898 !c->cs_yuv.progressive.u || !c->cs_yuv.progressive.v) {
899 debug_printf("Unable to create YCbCr p-to-NV12 compute shader.\n");
900 return false;
901 }
902
903 c->cs_rgb_yuv.y = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_Y);
904 c->cs_rgb_yuv.uv = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_UV);
905 c->cs_rgb_yuv.u = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_U);
906 c->cs_rgb_yuv.v = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_V);
907 if (!c->cs_rgb_yuv.y || !c->cs_rgb_yuv.uv ||
908 !c->cs_rgb_yuv.u || !c->cs_rgb_yuv.v) {
909 debug_printf("Unable to create RGB-to-NV12 compute shader.\n");
910 return false;
911 }
912
913 return true;
914 }
915
vl_compositor_cs_cleanup_shaders(struct vl_compositor * c)916 void vl_compositor_cs_cleanup_shaders(struct vl_compositor *c)
917 {
918 assert(c);
919
920 if (c->cs_video_buffer)
921 c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer);
922 if (c->cs_weave_rgb)
923 c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb);
924 if (c->cs_rgba)
925 c->pipe->delete_compute_state(c->pipe, c->cs_rgba);
926 if (c->cs_yuv.weave.y)
927 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.y);
928 if (c->cs_yuv.weave.uv)
929 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.uv);
930 if (c->cs_yuv.progressive.y)
931 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.y);
932 if (c->cs_yuv.progressive.uv)
933 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.uv);
934 if (c->cs_yuv.progressive.u)
935 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.u);
936 if (c->cs_yuv.progressive.v)
937 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.v);
938 if (c->cs_rgb_yuv.y)
939 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.y);
940 if (c->cs_rgb_yuv.uv)
941 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.uv);
942 if (c->cs_rgb_yuv.u)
943 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.u);
944 if (c->cs_rgb_yuv.v)
945 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.v);
946 }
947