1 /**************************************************************************
2 *
3 * Copyright 2019 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 * Authors: James Zhu <james.zhu<@amd.com>
27 *
28 **************************************************************************/
29
30 #include <assert.h>
31
32 #include "nir/nir_builder.h"
33 #include "vl_compositor_cs.h"
34
35 struct cs_viewport {
36 struct u_rect area;
37 int translate_x; /* dst */
38 int translate_y;
39 float sampler0_w;
40 float sampler0_h;
41 float clamp_x;
42 float clamp_y;
43 float chroma_clamp_x;
44 float chroma_clamp_y;
45 float chroma_offset_x;
46 float chroma_offset_y;
47 float proj[2][4];
48 float chroma_proj[2][4];
49 };
50
51 struct cs_shader {
52 nir_builder b;
53 const char *name;
54 bool array;
55 unsigned num_samplers;
56 nir_variable *samplers[3];
57 nir_variable *image;
58 nir_def *params[11];
59 nir_def *fone;
60 nir_def *fzero;
61 };
62
63 enum coords_flags {
64 COORDS_LUMA = 0x0,
65 COORDS_CHROMA = 0x1,
66 COORDS_CHROMA_OFFSET = 0x2,
67 };
68
cs_create_shader(struct vl_compositor * c,struct cs_shader * s)69 static nir_def *cs_create_shader(struct vl_compositor *c, struct cs_shader *s)
70 {
71 /*
72 #version 450
73
74 layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
75 layout (binding = 0) uniform sampler2DRect samplers[3]; // or sampler2DArray
76 layout (binding = 0) uniform image2D image;
77
78 layout (std140, binding = 0) uniform ubo
79 {
80 vec4 csc_mat[3]; // params[0-2]
81 float luma_min; // params[3].x
82 float luma_max; // params[3].y
83 vec2 chroma_offset; // params[3].zw
84 ivec2 translate; // params[4].zw
85 vec2 sampler0_wh; // params[5].xy
86 vec2 subsample_ratio; // params[5].zw
87 vec2 coord_clamp; // params[6].xy
88 vec2 chroma_clamp; // params[6].zw
89 vec4 proj[3]; // params[7-8]
90 vec4 chroma_proj[3]; // params[9-10]
91 };
92
93 void main()
94 {
95 ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
96 }
97 */
98 enum glsl_sampler_dim sampler_dim = s->array ? GLSL_SAMPLER_DIM_2D : GLSL_SAMPLER_DIM_RECT;
99 const struct glsl_type *sampler_type =
100 glsl_sampler_type(sampler_dim, /*is_shadow*/ false, s->array, GLSL_TYPE_FLOAT);
101 const struct glsl_type *image_type =
102 glsl_image_type(GLSL_SAMPLER_DIM_2D, /*is_array*/ false, GLSL_TYPE_FLOAT);
103 const nir_shader_compiler_options *options =
104 c->pipe->screen->get_compiler_options(c->pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
105
106 s->b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "vl:%s", s->name);
107 nir_builder *b = &s->b;
108 b->shader->info.workgroup_size[0] = 8;
109 b->shader->info.workgroup_size[1] = 8;
110 b->shader->info.workgroup_size[2] = 1;
111 b->shader->info.num_ubos = 1;
112 b->shader->num_uniforms = ARRAY_SIZE(s->params);
113
114 nir_def *zero = nir_imm_int(b, 0);
115 for (unsigned i = 0; i < b->shader->num_uniforms; ++i)
116 s->params[i] = nir_load_ubo(b, 4, 32, zero, nir_imm_int(b, i * 16), .align_mul = 4, .range = ~0);
117
118 for (unsigned i = 0; i < s->num_samplers; ++i) {
119 s->samplers[i] = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "sampler");
120 s->samplers[i]->data.binding = i;
121 BITSET_SET(b->shader->info.textures_used, i);
122 BITSET_SET(b->shader->info.samplers_used, i);
123 }
124
125 s->image = nir_variable_create(b->shader, nir_var_image, image_type, "image");
126 s->image->data.binding = 0;
127 BITSET_SET(b->shader->info.images_used, 0);
128
129 s->fone = nir_imm_float(b, 1.0f);
130 s->fzero = nir_imm_float(b, 0.0f);
131
132 nir_def *block_ids = nir_load_workgroup_id(b);
133 nir_def *local_ids = nir_load_local_invocation_id(b);
134 return nir_iadd(b, nir_imul(b, block_ids, nir_imm_ivec3(b, 8, 8, 1)), local_ids);
135 }
136
cs_create_shader_state(struct vl_compositor * c,struct cs_shader * s)137 static void *cs_create_shader_state(struct vl_compositor *c, struct cs_shader *s)
138 {
139 c->pipe->screen->finalize_nir(c->pipe->screen, s->b.shader);
140
141 struct pipe_compute_state state = {0};
142 state.ir_type = PIPE_SHADER_IR_NIR;
143 state.prog = s->b.shader;
144
145 /* create compute shader */
146 return c->pipe->create_compute_state(c->pipe, &state);
147 }
148
cs_translate(struct cs_shader * s,nir_def * src)149 static inline nir_def *cs_translate(struct cs_shader *s, nir_def *src)
150 {
151 /*
152 return src.xy + params[4].zw;
153 */
154 nir_builder *b = &s->b;
155 return nir_iadd(b, src, nir_channels(b, s->params[4], 0x3 << 2));
156 }
157
cs_texture_offset(struct cs_shader * s,nir_def * src)158 static inline nir_def *cs_texture_offset(struct cs_shader *s, nir_def *src)
159 {
160 /*
161 return src.xy + 0.5;
162 */
163 nir_builder *b = &s->b;
164 return nir_fadd_imm(b, src, 0.5f);
165 }
166
cs_chroma_subsampling(struct cs_shader * s,nir_def * src)167 static inline nir_def *cs_chroma_subsampling(struct cs_shader *s, nir_def *src)
168 {
169 /*
170 return src.xy * params[5].zw;
171 */
172 nir_builder *b = &s->b;
173 return nir_fmul(b, src, nir_channels(b, s->params[5], 0x3 << 2));
174 }
175
cs_proj(struct cs_shader * s,nir_def * src,unsigned flags)176 static inline nir_def *cs_proj(struct cs_shader *s, nir_def *src, unsigned flags)
177 {
178 /*
179 uint idx = flags & COORDS_CHROMA ? 9 : 7;
180 float x = dot(src.xy, params[idx]);
181 float y = dot(src.xy, params[idx + 1]);
182 return vec3(x, y, 1.0);
183 */
184 nir_builder *b = &s->b;
185 unsigned idx = flags & COORDS_CHROMA ? 9 : 7;
186 src = nir_vector_insert_imm(b, src, s->fone, 2);
187 nir_def *x = nir_fdot3(b, src, s->params[idx]);
188 nir_def *y = nir_fdot3(b, src, s->params[idx + 1]);
189 return nir_vec3(b, x, y, s->fzero);
190 }
191
cs_luma_key(struct cs_shader * s,nir_def * src)192 static inline nir_def *cs_luma_key(struct cs_shader *s, nir_def *src)
193 {
194 /*
195 bool luma_min = params[3].x >= src;
196 bool luma_max = params[3].y < src;
197 return float(luma_min || luma_max);
198 */
199 nir_builder *b = &s->b;
200 nir_def *luma_min = nir_fge(b, nir_channel(b, s->params[3], 0), src);
201 nir_def *luma_max = nir_flt(b, nir_channel(b, s->params[3], 1), src);
202 return nir_b2f32(b, nir_ior(b, luma_min, luma_max));
203 }
204
cs_chroma_offset(struct cs_shader * s,nir_def * src,unsigned flags)205 static inline nir_def *cs_chroma_offset(struct cs_shader *s, nir_def *src, unsigned flags)
206 {
207 /*
208 vec2 offset = params[3].zw;
209 if (flags & COORDS_CHROMA)
210 return src.xy + offset;
211 return offset * -0.5 + src.xy;
212 */
213 nir_builder *b = &s->b;
214 nir_def *offset = nir_channels(b, s->params[3], 0x3 << 2);
215 if (flags & COORDS_CHROMA)
216 return nir_fadd(b, src, offset);
217 return nir_ffma_imm1(b, offset, -0.5f, src);
218 }
219
cs_clamp(struct cs_shader * s,nir_def * src,unsigned flags)220 static inline nir_def *cs_clamp(struct cs_shader *s, nir_def *src, unsigned flags)
221 {
222 /*
223 vec2 coord_max;
224 if (flags & COORDS_CHROMA)
225 coord_max = params[6].zw;
226 else
227 coord_max = params[6].xy;
228 return min(src.xy, coord_max);
229 */
230 nir_builder *b = &s->b;
231 nir_component_mask_t mask = flags & COORDS_CHROMA ? 0x3 << 2 : 0x3;
232 return nir_fmin(b, src, nir_channels(b, s->params[6], mask));
233 }
234
cs_normalize(struct cs_shader * s,nir_def * src,unsigned flags)235 static inline nir_def *cs_normalize(struct cs_shader *s, nir_def *src, unsigned flags)
236 {
237 /*
238 vec2 div = params[5].xy;
239 if (flags & COORDS_CHROMA)
240 div = cs_chroma_subsampling(div);
241 return src.xy / div;
242 */
243 nir_builder *b = &s->b;
244 nir_def *div = nir_channels(b, s->params[5], 0x3);
245 if (flags & COORDS_CHROMA)
246 div = cs_chroma_subsampling(s, div);
247 return nir_fdiv(b, src, div);
248 }
249
cs_color_space_conversion(struct cs_shader * s,nir_def * src,unsigned comp)250 static inline nir_def *cs_color_space_conversion(struct cs_shader *s, nir_def *src, unsigned comp)
251 {
252 /*
253 return dot(src, params[comp]);
254 */
255 nir_builder *b = &s->b;
256 return nir_fdot4(b, src, s->params[comp]);
257 }
258
cs_fetch_texel(struct cs_shader * s,nir_def * coords,unsigned sampler)259 static inline nir_def *cs_fetch_texel(struct cs_shader *s, nir_def *coords, unsigned sampler)
260 {
261 /*
262 return texture(samplers[sampler], s->array ? coords.xyz : coords.xy);
263 */
264 nir_builder *b = &s->b;
265 nir_deref_instr *tex_deref = nir_build_deref_var(b, s->samplers[sampler]);
266 nir_component_mask_t mask = s->array ? 0x7 : 0x3;
267 return nir_tex_deref(b, tex_deref, tex_deref, nir_channels(b, coords, mask));
268 }
269
cs_image_load(struct cs_shader * s,nir_def * pos)270 static inline nir_def *cs_image_load(struct cs_shader *s, nir_def *pos)
271 {
272 /*
273 imageLoad(image, pos.xy);
274 */
275 nir_builder *b = &s->b;
276 nir_def *zero = nir_imm_int(b, 0);
277 nir_def *sample = nir_imm_int(b, 0);
278 pos = nir_pad_vector_imm_int(b, pos, 0, 4);
279 enum glsl_sampler_dim sampler_dim = s->array ? GLSL_SAMPLER_DIM_2D : GLSL_SAMPLER_DIM_RECT;
280 return nir_image_deref_load(b, 4, 32, &nir_build_deref_var(b, s->image)->def, pos, sample, zero,
281 .image_dim = sampler_dim,
282 .image_array = s->array);
283 }
284
cs_image_store(struct cs_shader * s,nir_def * pos,nir_def * color)285 static inline void cs_image_store(struct cs_shader *s, nir_def *pos, nir_def *color)
286 {
287 /*
288 imageStore(image, pos.xy, color);
289 */
290 nir_builder *b = &s->b;
291 nir_def *zero = nir_imm_int(b, 0);
292 nir_def *undef32 = nir_undef(b, 1, 32);
293 pos = nir_pad_vector_imm_int(b, pos, 0, 4);
294 nir_image_deref_store(b, &nir_build_deref_var(b, s->image)->def, pos, undef32, color, zero);
295 }
296
cs_tex_coords(struct cs_shader * s,nir_def * coords,unsigned flags)297 static nir_def *cs_tex_coords(struct cs_shader *s, nir_def *coords, unsigned flags)
298 {
299 nir_builder *b = &s->b;
300
301 coords = nir_u2f32(b, coords);
302 coords = cs_texture_offset(s, coords);
303
304 if (flags & COORDS_CHROMA_OFFSET)
305 coords = cs_chroma_offset(s, coords, flags);
306
307 if (flags & COORDS_CHROMA)
308 coords = cs_chroma_subsampling(s, coords);
309
310 coords = cs_proj(s, coords, flags);
311 coords = cs_clamp(s, coords, flags);
312
313 return coords;
314 }
315
create_video_buffer_shader(struct vl_compositor * c)316 static void *create_video_buffer_shader(struct vl_compositor *c)
317 {
318 struct cs_shader s = {
319 .name = "video_buffer",
320 .num_samplers = 3,
321 };
322 nir_builder *b = &s.b;
323
324 nir_def *ipos = cs_create_shader(c, &s);
325 nir_def *pos[2] = {
326 cs_tex_coords(&s, ipos, COORDS_LUMA),
327 cs_tex_coords(&s, ipos, COORDS_CHROMA | COORDS_CHROMA_OFFSET),
328 };
329
330 nir_def *col[3];
331 for (unsigned i = 0; i < 3; ++i)
332 col[i] = cs_fetch_texel(&s, pos[MIN2(i, 1)], i);
333
334 nir_def *alpha = cs_luma_key(&s, col[2]);
335
336 nir_def *color = nir_vec4(b, col[0], col[1], col[2], s.fone);
337 for (unsigned i = 0; i < 3; ++i)
338 col[i] = cs_color_space_conversion(&s, color, i);
339
340 color = nir_vec4(b, col[0], col[1], col[2], alpha);
341 cs_image_store(&s, cs_translate(&s, ipos), color);
342
343 return cs_create_shader_state(c, &s);
344 }
345
create_yuv_progressive_shader(struct vl_compositor * c,enum vl_compositor_plane plane)346 static void *create_yuv_progressive_shader(struct vl_compositor *c, enum vl_compositor_plane plane)
347 {
348 struct cs_shader s = {
349 .name = "yuv_progressive",
350 .num_samplers = 3,
351 };
352 nir_builder *b = &s.b;
353
354 nir_def *ipos = cs_create_shader(c, &s);
355 nir_def *pos = cs_tex_coords(&s, ipos, plane == VL_COMPOSITOR_PLANE_Y ? COORDS_LUMA : COORDS_CHROMA);
356
357 nir_def *color;
358 if (plane != VL_COMPOSITOR_PLANE_UV) {
359 unsigned c = 0;
360 if (plane == VL_COMPOSITOR_PLANE_U)
361 c = 1;
362 else if (plane == VL_COMPOSITOR_PLANE_V)
363 c = 2;
364 color = nir_channel(b, cs_fetch_texel(&s, pos, c), c);
365 } else {
366 nir_def *col1 = cs_fetch_texel(&s, pos, 1);
367 nir_def *col2 = cs_fetch_texel(&s, pos, 2);
368 color = nir_vec2(b, col1, col2);
369 }
370
371 cs_image_store(&s, cs_translate(&s, ipos), color);
372
373 return cs_create_shader_state(c, &s);
374 }
375
create_rgb_yuv_shader(struct vl_compositor * c,enum vl_compositor_plane plane)376 static void *create_rgb_yuv_shader(struct vl_compositor *c, enum vl_compositor_plane plane)
377 {
378 struct cs_shader s = {
379 .name = "rgb_yuv",
380 .num_samplers = 1,
381 };
382 nir_builder *b = &s.b;
383
384 nir_def *ipos = cs_create_shader(c, &s);
385 nir_def *color = NULL;
386
387 if (plane == VL_COMPOSITOR_PLANE_Y) {
388 nir_def *pos = cs_tex_coords(&s, ipos, COORDS_LUMA);
389 color = cs_fetch_texel(&s, pos, 0);
390 } else {
391 /*
392 vec2 pos[4];
393 pos[0] = vec2(ipos);
394 pos[0] = cs_texture_offset(pos[0]);
395 pos[0] = cs_chroma_offset(pos[0], COORDS_LUMA);
396
397 // Sample offset
398 pos[3] = pos[0] + vec2( 0.25, -0.25);
399 pos[2] = pos[0] + vec2(-0.25, 0.25);
400 pos[1] = pos[0] + vec2(-0.25, -0.25);
401 pos[0] = pos[0] + vec2( 0.25, 0.25);
402
403 vec4 col[4];
404 for (uint i = 0; i < 4; ++i) {
405 pos[i] = cs_proj(pos[i], COORDS_LUMA);
406 pos[i] = cs_clamp(pos[i], COORDS_LUMA);
407 col[i] = texture(samp[0], pos[i]);
408 }
409 color = (col[0] + col[1] + col[2] + col[3]) * 0.25;
410 */
411 nir_def *pos[4];
412 pos[0] = nir_u2f32(b, ipos);
413 pos[0] = cs_texture_offset(&s, pos[0]);
414 pos[0] = cs_chroma_offset(&s, pos[0], COORDS_LUMA);
415
416 /* Sample offset */
417 nir_def *o_plus = nir_imm_float(b, 0.25f);
418 nir_def *o_minus = nir_imm_float(b, -0.25f);
419 pos[3] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_minus));
420 pos[2] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_plus));
421 pos[1] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_minus));
422 pos[0] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_plus));
423
424 for (unsigned i = 0; i < 4; ++i) {
425 pos[i] = cs_proj(&s, pos[i], COORDS_LUMA);
426 pos[i] = cs_clamp(&s, pos[i], COORDS_LUMA);
427
428 nir_def *c = cs_fetch_texel(&s, pos[i], 0);
429 color = color ? nir_fadd(b, color, c) : c;
430 }
431 color = nir_fmul_imm(b, color, 0.25f);
432 }
433
434 color = nir_vector_insert_imm(b, color, s.fone, 3);
435
436 if (plane != VL_COMPOSITOR_PLANE_UV) {
437 unsigned c = 0;
438 if (plane == VL_COMPOSITOR_PLANE_U)
439 c = 1;
440 else if (plane == VL_COMPOSITOR_PLANE_V)
441 c = 2;
442 color = cs_color_space_conversion(&s, color, c);
443 } else {
444 nir_def *col1 = cs_color_space_conversion(&s, color, 1);
445 nir_def *col2 = cs_color_space_conversion(&s, color, 2);
446 color = nir_vec2(b, col1, col2);
447 }
448
449 cs_image_store(&s, cs_translate(&s, ipos), color);
450
451 return cs_create_shader_state(c, &s);
452 }
453
create_weave_shader(struct vl_compositor * c,bool rgb,bool y)454 static nir_def *create_weave_shader(struct vl_compositor *c, bool rgb, bool y)
455 {
456 struct cs_shader s = {
457 .name = rgb ? "weave" : y ? "yuv_weave_y" : "yuv_weave_uv",
458 .array = true,
459 .num_samplers = 3,
460 };
461 nir_builder *b = &s.b;
462
463 nir_def *ipos = cs_create_shader(c, &s);
464
465 /*
466 vec2 top_y = cs_texture_offset(vec2(ipos));
467 vec2 top_uv = rgb ? cs_chroma_offset(top_y, COORDS_CHROMA) : top_y;
468 top_uv = cs_chroma_subsampling(top_uv);
469 vec2 down_y = top_y;
470 vec2 down_uv = top_uv;
471
472 top_y = cs_proj(top_y, COORDS_LUMA);
473 top_uv = cs_proj(top_uv, COORDS_CHROMA);
474 down_y = cs_proj(down_y, COORDS_LUMA);
475 down_uv = cs_proj(down_uv, COORDS_CHROMA);
476
477 // Weave offset
478 top_y = top_y + vec2(0.0, 0.25);
479 top_uv = top_uv + vec2(0.0, 0.25);
480 down_y = down_y + vec2(0.0, -0.25);
481 down_uv = down_uv + vec2(0.0, -0.25);
482
483 // Texture layer
484 vec3 tex_layer = vec3(top_y.y, top_uv.y, top_uv.y);
485 tex_layer = tex_layer + round(tex_layer) * -1.0;
486 tex_layer = abs(tex_layer) * 2.0;
487
488 top_y = cs_clamp(top_y, COORDS_LUMA);
489 top_y = cs_normalize(top_y, COORDS_LUMA);
490 top_uv = cs_clamp(top_uv, COORDS_CHROMA);
491 top_uv = cs_normalize(top_uv, COORDS_CHROMA);
492 down_y = cs_clamp(down_y, COORDS_LUMA);
493 down_y = cs_normalize(down_y, COORDS_LUMA);
494 down_uv = cs_clamp(down_uv, COORDS_CHROMA);
495 down_uv = cs_normalize(down_uv, COORDS_CHROMA);
496
497 vec4 top_col, down_col;
498 top_col.x = texture(samp[0], vec3(top_y, 0.0)).x;
499 top_col.y = texture(samp[1], vec3(top_uv, 0.0)).x;
500 top_col.z = texture(samp[2], vec3(top_uv, 0.0)).x;
501 top_col.w = 1.0;
502 down_col.x = texture(samp[0], vec3(down_y, 1.0)).x;
503 down_col.y = texture(samp[1], vec3(down_uv, 1.0)).x;
504 down_col.z = texture(samp[2], vec3(down_uv, 1.0)).x;
505 down_col.w = 1.0;
506
507 vec4 color = mix(down_col, top_col, tex_layer);
508 */
509 nir_def *pos[4];
510 /* Top Y */
511 pos[0] = nir_u2f32(b, ipos);
512 pos[0] = cs_texture_offset(&s, pos[0]);
513 /* Top UV */
514 pos[1] = rgb ? cs_chroma_offset(&s, pos[0], COORDS_CHROMA) : pos[0];
515 pos[1] = cs_chroma_subsampling(&s, pos[1]);
516 /* Down Y */
517 pos[2] = pos[0];
518 /* Down UV */
519 pos[3] = pos[1];
520
521 /* Weave offset */
522 nir_def *o_plus = nir_imm_vec2(b, 0.0f, 0.25f);
523 nir_def *o_minus = nir_imm_vec2(b, 0.0f, -0.25f);
524 for (unsigned i = 0; i < 4; ++i) {
525 pos[i] = cs_proj(&s, pos[i], i % 2 ? COORDS_CHROMA : COORDS_LUMA);
526 pos[i] = nir_fadd(b, pos[i], i < 2 ? o_plus : o_minus);
527 }
528
529 /* Texture layer */
530 nir_def *tex_layer = nir_vec3(b,
531 nir_channel(b, pos[0], 1),
532 nir_channel(b, pos[1], 1),
533 nir_channel(b, pos[1], 1));
534 tex_layer = nir_fadd(b, tex_layer,
535 nir_fneg(b, nir_fround_even(b, tex_layer)));
536 tex_layer = nir_fabs(b, tex_layer);
537 tex_layer = nir_fmul_imm(b, tex_layer, 2.0f);
538
539 nir_def *col[6];
540 for (unsigned i = 0; i < 4; ++i) {
541 bool top = i < 2;
542 unsigned j = top ? 0 : 3;
543 unsigned flags = i % 2 ? COORDS_CHROMA : COORDS_LUMA;
544 pos[i] = cs_clamp(&s, pos[i], flags);
545 pos[i] = cs_normalize(&s, pos[i], flags);
546 pos[i] = nir_vector_insert_imm(b, pos[i],
547 top ? s.fzero : s.fone, 2);
548 if (flags == COORDS_LUMA) {
549 col[j] = cs_fetch_texel(&s, pos[i], 0);
550 } else {
551 col[j + 1] = cs_fetch_texel(&s, pos[i], 1);
552 col[j + 2] = cs_fetch_texel(&s, pos[i], 2);
553 }
554 }
555
556 nir_def *color_top = nir_vec4(b, col[0], col[1], col[2], s.fone);
557 nir_def *color_down = nir_vec4(b, col[3], col[4], col[5], s.fone);
558 nir_def *color = nir_flrp(b, color_down, color_top, tex_layer);
559
560 if (rgb) {
561 nir_def *alpha = cs_luma_key(&s, nir_channel(b, color, 2));
562 for (unsigned i = 0; i < 3; ++i)
563 col[i] = cs_color_space_conversion(&s, color, i);
564 color = nir_vec4(b, col[0], col[1], col[2], alpha);
565 } else if (y) {
566 color = nir_channel(b, color, 0);
567 } else {
568 nir_def *col1 = nir_channel(b, color, 1);
569 nir_def *col2 = nir_channel(b, color, 2);
570 color = nir_vec2(b, col1, col2);
571 }
572
573 cs_image_store(&s, cs_translate(&s, ipos), color);
574
575 return cs_create_shader_state(c, &s);
576 }
577
create_rgba_shader(struct vl_compositor * c)578 static void *create_rgba_shader(struct vl_compositor *c)
579 {
580 struct cs_shader s = {
581 .name = "rgba",
582 .num_samplers = 1,
583 };
584 nir_builder *b = &s.b;
585
586 nir_def *ipos = cs_create_shader(c, &s);
587 nir_def *pos = cs_tex_coords(&s, ipos, COORDS_LUMA);
588 nir_def *pos_out = cs_translate(&s, ipos);
589
590 nir_def *col = cs_fetch_texel(&s, pos, 0);
591 nir_def *blend = cs_image_load(&s, pos_out);
592
593 nir_def *color = nir_flrp(b, blend, col, nir_channel(b, col, 3));
594 color = nir_vector_insert_imm(b, color, s.fone, 3);
595
596 cs_image_store(&s, pos_out, color);
597
598 return cs_create_shader_state(c, &s);
599 }
600
601 static void
cs_launch(struct vl_compositor * c,void * cs,const struct u_rect * draw_area)602 cs_launch(struct vl_compositor *c,
603 void *cs,
604 const struct u_rect *draw_area)
605 {
606 struct pipe_context *ctx = c->pipe;
607 unsigned width, height;
608
609 width = draw_area->x1 - draw_area->x0;
610 height = draw_area->y1 - draw_area->y0;
611
612 /* Bind the image */
613 struct pipe_image_view image = {0};
614 image.resource = c->fb_state.cbufs[0]->texture;
615 image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;
616 image.format = c->fb_state.cbufs[0]->texture->format;
617
618 ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, &image);
619
620 /* Bind compute shader */
621 ctx->bind_compute_state(ctx, cs);
622
623 /* Dispatch compute */
624 struct pipe_grid_info info = {0};
625 info.block[0] = 8;
626 info.last_block[0] = width % info.block[0];
627 info.block[1] = 8;
628 info.last_block[1] = height % info.block[1];
629 info.block[2] = 1;
630 info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
631 info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
632 info.grid[2] = 1;
633
634 ctx->launch_grid(ctx, &info);
635
636 /* Make the result visible to all clients. */
637 ctx->memory_barrier(ctx, PIPE_BARRIER_ALL);
638
639 }
640
641 static inline struct u_rect
calc_drawn_area(struct vl_compositor_state * s,struct vl_compositor_layer * layer)642 calc_drawn_area(struct vl_compositor_state *s,
643 struct vl_compositor_layer *layer)
644 {
645 struct vertex2f tl, br;
646 struct u_rect result;
647
648 assert(s && layer);
649
650 tl = layer->dst.tl;
651 br = layer->dst.br;
652
653 /* Scale */
654 result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
655 result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
656 result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
657 result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
658
659 /* Clip */
660 result.x0 = MAX2(result.x0, s->scissor.minx);
661 result.y0 = MAX2(result.y0, s->scissor.miny);
662 result.x1 = MIN2(result.x1, s->scissor.maxx);
663 result.y1 = MIN2(result.y1, s->scissor.maxy);
664 return result;
665 }
666
667 static inline void
calc_proj(struct vl_compositor_layer * layer,struct pipe_resource * texture,float m[2][4])668 calc_proj(struct vl_compositor_layer *layer,
669 struct pipe_resource *texture,
670 float m[2][4])
671 {
672 enum vl_compositor_mirror mirror = layer->mirror;
673 float ratio_x = (float)texture->width0 / layer->sampler_views[0]->texture->width0;
674 float ratio_y = (float)texture->height0 / layer->sampler_views[0]->texture->height0;
675 float width = layer->sampler_views[0]->texture->width0;
676 float height = layer->sampler_views[0]->texture->height0;
677 float translate_x = texture->width0 * ratio_x;
678 float translate_y = texture->height0 * ratio_y;
679
680 memset(m, 0, sizeof(float) * 2 * 4);
681
682 switch (layer->rotate) {
683 default:
684 case VL_COMPOSITOR_ROTATE_0:
685 m[0][0] = 1.0;
686 m[1][1] = 1.0;
687 break;
688 case VL_COMPOSITOR_ROTATE_90:
689 m[0][1] = 1.0;
690 m[1][0] = -1.0;
691 m[1][2] = translate_y;
692 width = layer->sampler_views[0]->texture->height0;
693 height = layer->sampler_views[0]->texture->width0;
694 break;
695 case VL_COMPOSITOR_ROTATE_180:
696 m[0][0] = 1.0;
697 m[1][1] = 1.0;
698 if (mirror != VL_COMPOSITOR_MIRROR_VERTICAL)
699 mirror = VL_COMPOSITOR_MIRROR_VERTICAL;
700 else
701 mirror = VL_COMPOSITOR_MIRROR_HORIZONTAL;
702 break;
703 case VL_COMPOSITOR_ROTATE_270:
704 m[0][1] = -1.0;
705 m[1][0] = 1.0;
706 m[0][2] = translate_x;
707 width = layer->sampler_views[0]->texture->height0;
708 height = layer->sampler_views[0]->texture->width0;
709 break;
710 }
711
712 switch (mirror) {
713 default:
714 case VL_COMPOSITOR_MIRROR_NONE:
715 break;
716 case VL_COMPOSITOR_MIRROR_HORIZONTAL:
717 m[0][0] *= -1;
718 m[0][1] *= -1;
719 m[0][2] = translate_x - m[0][2];
720 break;
721 case VL_COMPOSITOR_MIRROR_VERTICAL:
722 m[1][0] *= -1;
723 m[1][1] *= -1;
724 m[1][2] = translate_y - m[1][2];
725 break;
726 }
727
728 float scale_x = (width * (layer->src.br.x - layer->src.tl.x)) / layer->viewport.scale[0];
729 float scale_y = (height * (layer->src.br.y - layer->src.tl.y)) / layer->viewport.scale[1];
730
731 m[0][0] *= scale_x;
732 m[0][1] *= scale_x;
733 m[0][2] *= scale_x;
734 m[1][0] *= scale_y;
735 m[1][1] *= scale_y;
736 m[1][2] *= scale_y;
737
738 float crop_x = (layer->src.tl.x * width) * ratio_x;
739 float crop_y = (layer->src.tl.y * height) * ratio_y;
740
741 m[0][2] += crop_x;
742 m[1][2] += crop_y;
743 }
744
745 static inline float
chroma_offset_x(unsigned location)746 chroma_offset_x(unsigned location)
747 {
748 if (location & VL_COMPOSITOR_LOCATION_HORIZONTAL_LEFT)
749 return 0.5f;
750 else
751 return 0.0f;
752 }
753
754 static inline float
chroma_offset_y(unsigned location)755 chroma_offset_y(unsigned location)
756 {
757 if (location & VL_COMPOSITOR_LOCATION_VERTICAL_TOP)
758 return 0.5f;
759 else if (location & VL_COMPOSITOR_LOCATION_VERTICAL_BOTTOM)
760 return -0.5f;
761 else
762 return 0.0f;
763 }
764
765 static bool
set_viewport(struct vl_compositor_state * s,struct cs_viewport * drawn,struct pipe_sampler_view ** samplers)766 set_viewport(struct vl_compositor_state *s,
767 struct cs_viewport *drawn,
768 struct pipe_sampler_view **samplers)
769 {
770 struct pipe_transfer *buf_transfer;
771
772 assert(s && drawn);
773
774 void *ptr = pipe_buffer_map(s->pipe, s->shader_params,
775 PIPE_MAP_WRITE | PIPE_MAP_DISCARD_WHOLE_RESOURCE,
776 &buf_transfer);
777
778 if (!ptr)
779 return false;
780
781 memcpy(ptr, &s->csc_matrix, sizeof(vl_csc_matrix));
782
783 float *ptr_float = (float *)ptr;
784 ptr_float += sizeof(vl_csc_matrix) / sizeof(float);
785 *ptr_float++ = s->luma_min;
786 *ptr_float++ = s->luma_max;
787 *ptr_float++ = drawn->chroma_offset_x;
788 *ptr_float++ = drawn->chroma_offset_y;
789 ptr_float += 2; /* pad */
790
791 int *ptr_int = (int *)ptr_float;
792 *ptr_int++ = drawn->translate_x;
793 *ptr_int++ = drawn->translate_y;
794
795 ptr_float = (float *)ptr_int;
796 *ptr_float++ = drawn->sampler0_w;
797 *ptr_float++ = drawn->sampler0_h;
798
799 /* compute_shader_video_buffer uses pixel coordinates based on the
800 * Y sampler dimensions. If U/V are using separate planes and are
801 * subsampled, we need to scale the coordinates */
802 if (samplers[1]) {
803 float h_ratio = samplers[1]->texture->width0 /
804 (float) samplers[0]->texture->width0;
805 *ptr_float++ = h_ratio;
806 float v_ratio = samplers[1]->texture->height0 /
807 (float) samplers[0]->texture->height0;
808 *ptr_float++ = v_ratio;
809 }
810 else {
811 *ptr_float++ = 1.0f;
812 *ptr_float++ = 1.0f;
813 }
814
815 *ptr_float++ = drawn->clamp_x;
816 *ptr_float++ = drawn->clamp_y;
817 *ptr_float++ = drawn->chroma_clamp_x;
818 *ptr_float++ = drawn->chroma_clamp_y;
819
820 memcpy(ptr_float, drawn->proj, sizeof(drawn->proj));
821 ptr_float += sizeof(drawn->proj) / sizeof(float);
822 memcpy(ptr_float, drawn->chroma_proj, sizeof(drawn->chroma_proj));
823
824 pipe_buffer_unmap(s->pipe, buf_transfer);
825
826 return true;
827 }
828
829 static void
draw_layers(struct vl_compositor * c,struct vl_compositor_state * s,struct u_rect * dirty)830 draw_layers(struct vl_compositor *c,
831 struct vl_compositor_state *s,
832 struct u_rect *dirty)
833 {
834 unsigned i;
835
836 assert(c);
837
838 for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
839 if (s->used_layers & (1 << i)) {
840 struct vl_compositor_layer *layer = &s->layers[i];
841 struct pipe_sampler_view **samplers = &layer->sampler_views[0];
842 unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
843 struct pipe_sampler_view *sampler1 = samplers[1] ? samplers[1] : samplers[0];
844 struct cs_viewport drawn;
845
846 drawn.area = calc_drawn_area(s, layer);
847 drawn.translate_x = layer->viewport.translate[0];
848 drawn.translate_y = layer->viewport.translate[1];
849 drawn.sampler0_w = (float)layer->sampler_views[0]->texture->width0;
850 drawn.sampler0_h = (float)layer->sampler_views[0]->texture->height0;
851 drawn.clamp_x = (float)samplers[0]->texture->width0 * layer->src.br.x - 0.5;
852 drawn.clamp_y = (float)samplers[0]->texture->height0 * layer->src.br.y - 0.5;
853 drawn.chroma_clamp_x = (float)sampler1->texture->width0 * layer->src.br.x - 0.5;
854 drawn.chroma_clamp_y = (float)sampler1->texture->height0 * layer->src.br.y - 0.5;
855 drawn.chroma_offset_x = chroma_offset_x(s->chroma_location);
856 drawn.chroma_offset_y = chroma_offset_y(s->chroma_location);
857 calc_proj(layer, samplers[0]->texture, drawn.proj);
858 calc_proj(layer, sampler1->texture, drawn.chroma_proj);
859 set_viewport(s, &drawn, samplers);
860
861 c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
862 num_sampler_views, layer->samplers);
863 c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0,
864 num_sampler_views, 0, false, samplers);
865
866 cs_launch(c, layer->cs, &(drawn.area));
867
868 /* Unbind. */
869 c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL);
870 c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, false, NULL);
871 c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0, 0,
872 num_sampler_views, false, NULL);
873 c->pipe->bind_compute_state(c->pipe, NULL);
874 c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
875 num_sampler_views, NULL);
876
877 if (dirty) {
878 struct u_rect drawn = calc_drawn_area(s, layer);
879 dirty->x0 = MIN2(drawn.x0, dirty->x0);
880 dirty->y0 = MIN2(drawn.y0, dirty->y0);
881 dirty->x1 = MAX2(drawn.x1, dirty->x1);
882 dirty->y1 = MAX2(drawn.y1, dirty->y1);
883 }
884 }
885 }
886 }
887
888 void
vl_compositor_cs_render(struct vl_compositor_state * s,struct vl_compositor * c,struct pipe_surface * dst_surface,struct u_rect * dirty_area,bool clear_dirty)889 vl_compositor_cs_render(struct vl_compositor_state *s,
890 struct vl_compositor *c,
891 struct pipe_surface *dst_surface,
892 struct u_rect *dirty_area,
893 bool clear_dirty)
894 {
895 assert(c && s);
896 assert(dst_surface);
897
898 c->fb_state.width = dst_surface->width;
899 c->fb_state.height = dst_surface->height;
900 c->fb_state.cbufs[0] = dst_surface;
901
902 if (!s->scissor_valid) {
903 s->scissor.minx = 0;
904 s->scissor.miny = 0;
905 s->scissor.maxx = dst_surface->width;
906 s->scissor.maxy = dst_surface->height;
907 }
908
909 if (clear_dirty && dirty_area &&
910 (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
911
912 c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
913 0, 0, dst_surface->width, dst_surface->height, false);
914 dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY;
915 dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY;
916 }
917
918 pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params);
919
920 draw_layers(c, s, dirty_area);
921 }
922
vl_compositor_cs_init_shaders(struct vl_compositor * c)923 bool vl_compositor_cs_init_shaders(struct vl_compositor *c)
924 {
925 assert(c);
926
927 c->cs_video_buffer = create_video_buffer_shader(c);
928 if (!c->cs_video_buffer) {
929 debug_printf("Unable to create video_buffer compute shader.\n");
930 return false;
931 }
932
933 c->cs_weave_rgb = create_weave_shader(c, true, false);
934 if (!c->cs_weave_rgb) {
935 debug_printf("Unable to create weave_rgb compute shader.\n");
936 return false;
937 }
938
939 c->cs_rgba = create_rgba_shader(c);
940 if (!c->cs_rgba) {
941 debug_printf("Unable to create rgba compute shader.\n");
942 return false;
943 }
944
945 c->cs_yuv.weave.y = create_weave_shader(c, false, true);
946 c->cs_yuv.weave.uv = create_weave_shader(c, false, false);
947 c->cs_yuv.progressive.y = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_Y);
948 c->cs_yuv.progressive.uv = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_UV);
949 c->cs_yuv.progressive.u = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_U);
950 c->cs_yuv.progressive.v = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_V);
951 if (!c->cs_yuv.weave.y || !c->cs_yuv.weave.uv) {
952 debug_printf("Unable to create YCbCr i-to-YCbCr p deint compute shader.\n");
953 return false;
954 }
955 if (!c->cs_yuv.progressive.y || !c->cs_yuv.progressive.uv ||
956 !c->cs_yuv.progressive.u || !c->cs_yuv.progressive.v) {
957 debug_printf("Unable to create YCbCr p-to-NV12 compute shader.\n");
958 return false;
959 }
960
961 c->cs_rgb_yuv.y = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_Y);
962 c->cs_rgb_yuv.uv = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_UV);
963 c->cs_rgb_yuv.u = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_U);
964 c->cs_rgb_yuv.v = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_V);
965 if (!c->cs_rgb_yuv.y || !c->cs_rgb_yuv.uv ||
966 !c->cs_rgb_yuv.u || !c->cs_rgb_yuv.v) {
967 debug_printf("Unable to create RGB-to-NV12 compute shader.\n");
968 return false;
969 }
970
971 return true;
972 }
973
vl_compositor_cs_cleanup_shaders(struct vl_compositor * c)974 void vl_compositor_cs_cleanup_shaders(struct vl_compositor *c)
975 {
976 assert(c);
977
978 if (c->cs_video_buffer)
979 c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer);
980 if (c->cs_weave_rgb)
981 c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb);
982 if (c->cs_rgba)
983 c->pipe->delete_compute_state(c->pipe, c->cs_rgba);
984 if (c->cs_yuv.weave.y)
985 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.y);
986 if (c->cs_yuv.weave.uv)
987 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.uv);
988 if (c->cs_yuv.progressive.y)
989 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.y);
990 if (c->cs_yuv.progressive.uv)
991 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.uv);
992 if (c->cs_yuv.progressive.u)
993 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.u);
994 if (c->cs_yuv.progressive.v)
995 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.v);
996 if (c->cs_rgb_yuv.y)
997 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.y);
998 if (c->cs_rgb_yuv.uv)
999 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.uv);
1000 if (c->cs_rgb_yuv.u)
1001 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.u);
1002 if (c->cs_rgb_yuv.v)
1003 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.v);
1004 }
1005