• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2019 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  * Authors: James Zhu <james.zhu<@amd.com>
27  *
28  **************************************************************************/
29 
30 #include <assert.h>
31 
32 #include "nir/nir_builder.h"
33 #include "vl_compositor_cs.h"
34 
35 struct cs_viewport {
36    struct u_rect area;
37    int translate_x; /* dst */
38    int translate_y;
39    float sampler0_w;
40    float sampler0_h;
41    float clamp_x;
42    float clamp_y;
43    float chroma_clamp_x;
44    float chroma_clamp_y;
45    float chroma_offset_x;
46    float chroma_offset_y;
47    float proj[2][4];
48    float chroma_proj[2][4];
49 };
50 
51 struct cs_shader {
52    nir_builder b;
53    const char *name;
54    bool array;
55    unsigned num_samplers;
56    nir_variable *samplers[3];
57    nir_variable *image;
58    nir_def *params[11];
59    nir_def *fone;
60    nir_def *fzero;
61 };
62 
63 enum coords_flags {
64    COORDS_LUMA          = 0x0,
65    COORDS_CHROMA        = 0x1,
66    COORDS_CHROMA_OFFSET = 0x2,
67 };
68 
cs_create_shader(struct vl_compositor * c,struct cs_shader * s)69 static nir_def *cs_create_shader(struct vl_compositor *c, struct cs_shader *s)
70 {
71    /*
72       #version 450
73 
74       layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
75       layout (binding = 0) uniform sampler2DRect samplers[3]; // or sampler2DArray
76       layout (binding = 0) uniform image2D image;
77 
78       layout (std140, binding = 0) uniform ubo
79       {
80          vec4 csc_mat[3];      // params[0-2]
81          float luma_min;       // params[3].x
82          float luma_max;       // params[3].y
83          vec2 chroma_offset;   // params[3].zw
84          ivec2 translate;      // params[4].zw
85          vec2 sampler0_wh;     // params[5].xy
86          vec2 subsample_ratio; // params[5].zw
87          vec2 coord_clamp;     // params[6].xy
88          vec2 chroma_clamp;    // params[6].zw
89          vec4 proj[3];         // params[7-8]
90          vec4 chroma_proj[3];  // params[9-10]
91       };
92 
93       void main()
94       {
95          ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
96       }
97    */
98    enum glsl_sampler_dim sampler_dim = s->array ? GLSL_SAMPLER_DIM_2D : GLSL_SAMPLER_DIM_RECT;
99    const struct glsl_type *sampler_type =
100       glsl_sampler_type(sampler_dim, /*is_shadow*/ false, s->array, GLSL_TYPE_FLOAT);
101    const struct glsl_type *image_type =
102       glsl_image_type(GLSL_SAMPLER_DIM_2D, /*is_array*/ false, GLSL_TYPE_FLOAT);
103    const nir_shader_compiler_options *options =
104       c->pipe->screen->get_compiler_options(c->pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
105 
106    s->b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "vl:%s", s->name);
107    nir_builder *b = &s->b;
108    b->shader->info.workgroup_size[0] = 8;
109    b->shader->info.workgroup_size[1] = 8;
110    b->shader->info.workgroup_size[2] = 1;
111    b->shader->info.num_ubos = 1;
112    b->shader->num_uniforms = ARRAY_SIZE(s->params);
113 
114    nir_def *zero = nir_imm_int(b, 0);
115    for (unsigned i = 0; i < b->shader->num_uniforms; ++i)
116       s->params[i] = nir_load_ubo(b, 4, 32, zero, nir_imm_int(b, i * 16), .align_mul = 4, .range = ~0);
117 
118    for (unsigned i = 0; i < s->num_samplers; ++i) {
119       s->samplers[i] = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "sampler");
120       s->samplers[i]->data.binding = i;
121       BITSET_SET(b->shader->info.textures_used, i);
122       BITSET_SET(b->shader->info.samplers_used, i);
123    }
124 
125    s->image = nir_variable_create(b->shader, nir_var_image, image_type, "image");
126    s->image->data.binding = 0;
127    BITSET_SET(b->shader->info.images_used, 0);
128 
129    s->fone = nir_imm_float(b, 1.0f);
130    s->fzero = nir_imm_float(b, 0.0f);
131 
132    nir_def *block_ids = nir_load_workgroup_id(b);
133    nir_def *local_ids = nir_load_local_invocation_id(b);
134    return nir_iadd(b, nir_imul(b, block_ids, nir_imm_ivec3(b, 8, 8, 1)), local_ids);
135 }
136 
cs_create_shader_state(struct vl_compositor * c,struct cs_shader * s)137 static void *cs_create_shader_state(struct vl_compositor *c, struct cs_shader *s)
138 {
139    c->pipe->screen->finalize_nir(c->pipe->screen, s->b.shader);
140 
141    struct pipe_compute_state state = {0};
142    state.ir_type = PIPE_SHADER_IR_NIR;
143    state.prog = s->b.shader;
144 
145    /* create compute shader */
146    return c->pipe->create_compute_state(c->pipe, &state);
147 }
148 
cs_translate(struct cs_shader * s,nir_def * src)149 static inline nir_def *cs_translate(struct cs_shader *s, nir_def *src)
150 {
151    /*
152       return src.xy + params[4].zw;
153    */
154    nir_builder *b = &s->b;
155    return nir_iadd(b, src, nir_channels(b, s->params[4], 0x3 << 2));
156 }
157 
cs_texture_offset(struct cs_shader * s,nir_def * src)158 static inline nir_def *cs_texture_offset(struct cs_shader *s, nir_def *src)
159 {
160    /*
161       return src.xy + 0.5;
162    */
163    nir_builder *b = &s->b;
164    return nir_fadd_imm(b, src, 0.5f);
165 }
166 
cs_chroma_subsampling(struct cs_shader * s,nir_def * src)167 static inline nir_def *cs_chroma_subsampling(struct cs_shader *s, nir_def *src)
168 {
169    /*
170       return src.xy * params[5].zw;
171    */
172    nir_builder *b = &s->b;
173    return nir_fmul(b, src, nir_channels(b, s->params[5], 0x3 << 2));
174 }
175 
cs_proj(struct cs_shader * s,nir_def * src,unsigned flags)176 static inline nir_def *cs_proj(struct cs_shader *s, nir_def *src, unsigned flags)
177 {
178    /*
179       uint idx = flags & COORDS_CHROMA ? 9 : 7;
180       float x = dot(src.xy, params[idx]);
181       float y = dot(src.xy, params[idx + 1]);
182       return vec3(x, y, 1.0);
183    */
184    nir_builder *b = &s->b;
185    unsigned idx = flags & COORDS_CHROMA ? 9 : 7;
186    src = nir_vector_insert_imm(b, src, s->fone, 2);
187    nir_def *x = nir_fdot3(b, src, s->params[idx]);
188    nir_def *y = nir_fdot3(b, src, s->params[idx + 1]);
189    return nir_vec3(b, x, y, s->fzero);
190 }
191 
cs_luma_key(struct cs_shader * s,nir_def * src)192 static inline nir_def *cs_luma_key(struct cs_shader *s, nir_def *src)
193 {
194    /*
195       bool luma_min = params[3].x >= src;
196       bool luma_max = params[3].y < src;
197       return float(luma_min || luma_max);
198    */
199    nir_builder *b = &s->b;
200    nir_def *luma_min = nir_fge(b, nir_channel(b, s->params[3], 0), src);
201    nir_def *luma_max = nir_flt(b, nir_channel(b, s->params[3], 1), src);
202    return nir_b2f32(b, nir_ior(b, luma_min, luma_max));
203 }
204 
cs_chroma_offset(struct cs_shader * s,nir_def * src,unsigned flags)205 static inline nir_def *cs_chroma_offset(struct cs_shader *s, nir_def *src, unsigned flags)
206 {
207    /*
208       vec2 offset = params[3].zw;
209       if (flags & COORDS_CHROMA)
210          return src.xy + offset;
211       return offset * -0.5 + src.xy;
212    */
213    nir_builder *b = &s->b;
214    nir_def *offset = nir_channels(b, s->params[3], 0x3 << 2);
215    if (flags & COORDS_CHROMA)
216       return nir_fadd(b, src, offset);
217    return nir_ffma_imm1(b, offset, -0.5f, src);
218 }
219 
cs_clamp(struct cs_shader * s,nir_def * src,unsigned flags)220 static inline nir_def *cs_clamp(struct cs_shader *s, nir_def *src, unsigned flags)
221 {
222    /*
223       vec2 coord_max;
224       if (flags & COORDS_CHROMA)
225          coord_max = params[6].zw;
226       else
227          coord_max = params[6].xy;
228       return min(src.xy, coord_max);
229    */
230    nir_builder *b = &s->b;
231    nir_component_mask_t mask = flags & COORDS_CHROMA ? 0x3 << 2 : 0x3;
232    return nir_fmin(b, src, nir_channels(b, s->params[6], mask));
233 }
234 
cs_normalize(struct cs_shader * s,nir_def * src,unsigned flags)235 static inline nir_def *cs_normalize(struct cs_shader *s, nir_def *src, unsigned flags)
236 {
237    /*
238       vec2 div = params[5].xy;
239       if (flags & COORDS_CHROMA)
240          div = cs_chroma_subsampling(div);
241       return src.xy / div;
242    */
243    nir_builder *b = &s->b;
244    nir_def *div = nir_channels(b, s->params[5], 0x3);
245    if (flags & COORDS_CHROMA)
246       div = cs_chroma_subsampling(s, div);
247    return nir_fdiv(b, src, div);
248 }
249 
cs_color_space_conversion(struct cs_shader * s,nir_def * src,unsigned comp)250 static inline nir_def *cs_color_space_conversion(struct cs_shader *s, nir_def *src, unsigned comp)
251 {
252    /*
253       return dot(src, params[comp]);
254    */
255    nir_builder *b = &s->b;
256    return nir_fdot4(b, src, s->params[comp]);
257 }
258 
cs_fetch_texel(struct cs_shader * s,nir_def * coords,unsigned sampler)259 static inline nir_def *cs_fetch_texel(struct cs_shader *s, nir_def *coords, unsigned sampler)
260 {
261    /*
262       return texture(samplers[sampler], s->array ? coords.xyz : coords.xy);
263    */
264    nir_builder *b = &s->b;
265    nir_deref_instr *tex_deref = nir_build_deref_var(b, s->samplers[sampler]);
266    nir_component_mask_t mask = s->array ? 0x7 : 0x3;
267    return nir_tex_deref(b, tex_deref, tex_deref, nir_channels(b, coords, mask));
268 }
269 
cs_image_load(struct cs_shader * s,nir_def * pos)270 static inline nir_def *cs_image_load(struct cs_shader *s, nir_def *pos)
271 {
272    /*
273       imageLoad(image, pos.xy);
274    */
275    nir_builder *b = &s->b;
276    nir_def *zero = nir_imm_int(b, 0);
277    nir_def *sample = nir_imm_int(b, 0);
278    pos = nir_pad_vector_imm_int(b, pos, 0, 4);
279    enum glsl_sampler_dim sampler_dim = s->array ? GLSL_SAMPLER_DIM_2D : GLSL_SAMPLER_DIM_RECT;
280    return nir_image_deref_load(b, 4, 32, &nir_build_deref_var(b, s->image)->def, pos, sample, zero,
281                                .image_dim = sampler_dim,
282                                .image_array = s->array);
283 }
284 
cs_image_store(struct cs_shader * s,nir_def * pos,nir_def * color)285 static inline void cs_image_store(struct cs_shader *s, nir_def *pos, nir_def *color)
286 {
287    /*
288       imageStore(image, pos.xy, color);
289    */
290    nir_builder *b = &s->b;
291    nir_def *zero = nir_imm_int(b, 0);
292    nir_def *undef32 = nir_undef(b, 1, 32);
293    pos = nir_pad_vector_imm_int(b, pos, 0, 4);
294    nir_image_deref_store(b, &nir_build_deref_var(b, s->image)->def, pos, undef32, color, zero);
295 }
296 
cs_tex_coords(struct cs_shader * s,nir_def * coords,unsigned flags)297 static nir_def *cs_tex_coords(struct cs_shader *s, nir_def *coords, unsigned flags)
298 {
299    nir_builder *b = &s->b;
300 
301    coords = nir_u2f32(b, coords);
302    coords = cs_texture_offset(s, coords);
303 
304    if (flags & COORDS_CHROMA_OFFSET)
305       coords = cs_chroma_offset(s, coords, flags);
306 
307    if (flags & COORDS_CHROMA)
308       coords = cs_chroma_subsampling(s, coords);
309 
310    coords = cs_proj(s, coords, flags);
311    coords = cs_clamp(s, coords, flags);
312 
313    return coords;
314 }
315 
create_video_buffer_shader(struct vl_compositor * c)316 static void *create_video_buffer_shader(struct vl_compositor *c)
317 {
318    struct cs_shader s = {
319       .name = "video_buffer",
320       .num_samplers = 3,
321    };
322    nir_builder *b = &s.b;
323 
324    nir_def *ipos = cs_create_shader(c, &s);
325    nir_def *pos[2] = {
326       cs_tex_coords(&s, ipos, COORDS_LUMA),
327       cs_tex_coords(&s, ipos, COORDS_CHROMA | COORDS_CHROMA_OFFSET),
328    };
329 
330    nir_def *col[3];
331    for (unsigned i = 0; i < 3; ++i)
332       col[i] = cs_fetch_texel(&s, pos[MIN2(i, 1)], i);
333 
334    nir_def *alpha = cs_luma_key(&s, col[2]);
335 
336    nir_def *color = nir_vec4(b, col[0], col[1], col[2], s.fone);
337    for (unsigned i = 0; i < 3; ++i)
338       col[i] = cs_color_space_conversion(&s, color, i);
339 
340    color = nir_vec4(b, col[0], col[1], col[2], alpha);
341    cs_image_store(&s, cs_translate(&s, ipos), color);
342 
343    return cs_create_shader_state(c, &s);
344 }
345 
create_yuv_progressive_shader(struct vl_compositor * c,enum vl_compositor_plane plane)346 static void *create_yuv_progressive_shader(struct vl_compositor *c, enum vl_compositor_plane plane)
347 {
348    struct cs_shader s = {
349       .name = "yuv_progressive",
350       .num_samplers = 3,
351    };
352    nir_builder *b = &s.b;
353 
354    nir_def *ipos = cs_create_shader(c, &s);
355    nir_def *pos = cs_tex_coords(&s, ipos, plane == VL_COMPOSITOR_PLANE_Y ? COORDS_LUMA : COORDS_CHROMA);
356 
357    nir_def *color;
358    if (plane != VL_COMPOSITOR_PLANE_UV) {
359       unsigned c = 0;
360       if (plane == VL_COMPOSITOR_PLANE_U)
361          c = 1;
362       else if (plane == VL_COMPOSITOR_PLANE_V)
363          c = 2;
364       color = nir_channel(b, cs_fetch_texel(&s, pos, c), c);
365    } else {
366       nir_def *col1 = cs_fetch_texel(&s, pos, 1);
367       nir_def *col2 = cs_fetch_texel(&s, pos, 2);
368       color = nir_vec2(b, col1, col2);
369    }
370 
371    cs_image_store(&s, cs_translate(&s, ipos), color);
372 
373    return cs_create_shader_state(c, &s);
374 }
375 
create_rgb_yuv_shader(struct vl_compositor * c,enum vl_compositor_plane plane)376 static void *create_rgb_yuv_shader(struct vl_compositor *c, enum vl_compositor_plane plane)
377 {
378    struct cs_shader s = {
379       .name = "rgb_yuv",
380       .num_samplers = 1,
381    };
382    nir_builder *b = &s.b;
383 
384    nir_def *ipos = cs_create_shader(c, &s);
385    nir_def *color = NULL;
386 
387    if (plane == VL_COMPOSITOR_PLANE_Y) {
388       nir_def *pos = cs_tex_coords(&s, ipos, COORDS_LUMA);
389       color = cs_fetch_texel(&s, pos, 0);
390    } else {
391       /*
392          vec2 pos[4];
393          pos[0] = vec2(ipos);
394          pos[0] = cs_texture_offset(pos[0]);
395          pos[0] = cs_chroma_offset(pos[0], COORDS_LUMA);
396 
397          // Sample offset
398          pos[3] = pos[0] + vec2( 0.25, -0.25);
399          pos[2] = pos[0] + vec2(-0.25,  0.25);
400          pos[1] = pos[0] + vec2(-0.25, -0.25);
401          pos[0] = pos[0] + vec2( 0.25,  0.25);
402 
403          vec4 col[4];
404          for (uint i = 0; i < 4; ++i) {
405             pos[i] = cs_proj(pos[i], COORDS_LUMA);
406             pos[i] = cs_clamp(pos[i], COORDS_LUMA);
407             col[i] = texture(samp[0], pos[i]);
408          }
409          color = (col[0] + col[1] + col[2] + col[3]) * 0.25;
410       */
411       nir_def *pos[4];
412       pos[0] = nir_u2f32(b, ipos);
413       pos[0] = cs_texture_offset(&s, pos[0]);
414       pos[0] = cs_chroma_offset(&s, pos[0], COORDS_LUMA);
415 
416       /* Sample offset */
417       nir_def *o_plus = nir_imm_float(b, 0.25f);
418       nir_def *o_minus = nir_imm_float(b, -0.25f);
419       pos[3] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_minus));
420       pos[2] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_plus));
421       pos[1] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_minus));
422       pos[0] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_plus));
423 
424       for (unsigned i = 0; i < 4; ++i) {
425          pos[i] = cs_proj(&s, pos[i], COORDS_LUMA);
426          pos[i] = cs_clamp(&s, pos[i], COORDS_LUMA);
427 
428          nir_def *c = cs_fetch_texel(&s, pos[i], 0);
429          color = color ? nir_fadd(b, color, c) : c;
430       }
431       color = nir_fmul_imm(b, color, 0.25f);
432    }
433 
434    color = nir_vector_insert_imm(b, color, s.fone, 3);
435 
436    if (plane != VL_COMPOSITOR_PLANE_UV) {
437       unsigned c = 0;
438       if (plane == VL_COMPOSITOR_PLANE_U)
439          c = 1;
440       else if (plane == VL_COMPOSITOR_PLANE_V)
441          c = 2;
442       color = cs_color_space_conversion(&s, color, c);
443    } else {
444       nir_def *col1 = cs_color_space_conversion(&s, color, 1);
445       nir_def *col2 = cs_color_space_conversion(&s, color, 2);
446       color = nir_vec2(b, col1, col2);
447    }
448 
449    cs_image_store(&s, cs_translate(&s, ipos), color);
450 
451    return cs_create_shader_state(c, &s);
452 }
453 
create_weave_shader(struct vl_compositor * c,bool rgb,bool y)454 static nir_def *create_weave_shader(struct vl_compositor *c, bool rgb, bool y)
455 {
456    struct cs_shader s = {
457       .name = rgb ? "weave" : y ? "yuv_weave_y" : "yuv_weave_uv",
458       .array = true,
459       .num_samplers = 3,
460    };
461    nir_builder *b = &s.b;
462 
463    nir_def *ipos = cs_create_shader(c, &s);
464 
465    /*
466       vec2 top_y = cs_texture_offset(vec2(ipos));
467       vec2 top_uv = rgb ? cs_chroma_offset(top_y, COORDS_CHROMA) : top_y;
468       top_uv = cs_chroma_subsampling(top_uv);
469       vec2 down_y = top_y;
470       vec2 down_uv = top_uv;
471 
472       top_y = cs_proj(top_y, COORDS_LUMA);
473       top_uv = cs_proj(top_uv, COORDS_CHROMA);
474       down_y = cs_proj(down_y, COORDS_LUMA);
475       down_uv = cs_proj(down_uv, COORDS_CHROMA);
476 
477       // Weave offset
478       top_y = top_y + vec2(0.0, 0.25);
479       top_uv = top_uv + vec2(0.0, 0.25);
480       down_y = down_y + vec2(0.0, -0.25);
481       down_uv = down_uv + vec2(0.0, -0.25);
482 
483       // Texture layer
484       vec3 tex_layer = vec3(top_y.y, top_uv.y, top_uv.y);
485       tex_layer = tex_layer + round(tex_layer) * -1.0;
486       tex_layer = abs(tex_layer) * 2.0;
487 
488       top_y = cs_clamp(top_y, COORDS_LUMA);
489       top_y = cs_normalize(top_y, COORDS_LUMA);
490       top_uv = cs_clamp(top_uv, COORDS_CHROMA);
491       top_uv = cs_normalize(top_uv, COORDS_CHROMA);
492       down_y = cs_clamp(down_y, COORDS_LUMA);
493       down_y = cs_normalize(down_y, COORDS_LUMA);
494       down_uv = cs_clamp(down_uv, COORDS_CHROMA);
495       down_uv = cs_normalize(down_uv, COORDS_CHROMA);
496 
497       vec4 top_col, down_col;
498       top_col.x = texture(samp[0], vec3(top_y, 0.0)).x;
499       top_col.y = texture(samp[1], vec3(top_uv, 0.0)).x;
500       top_col.z = texture(samp[2], vec3(top_uv, 0.0)).x;
501       top_col.w = 1.0;
502       down_col.x = texture(samp[0], vec3(down_y, 1.0)).x;
503       down_col.y = texture(samp[1], vec3(down_uv, 1.0)).x;
504       down_col.z = texture(samp[2], vec3(down_uv, 1.0)).x;
505       down_col.w = 1.0;
506 
507       vec4 color = mix(down_col, top_col, tex_layer);
508    */
509    nir_def *pos[4];
510    /* Top Y */
511    pos[0] = nir_u2f32(b, ipos);
512    pos[0] = cs_texture_offset(&s, pos[0]);
513    /* Top UV */
514    pos[1] = rgb ? cs_chroma_offset(&s, pos[0], COORDS_CHROMA) : pos[0];
515    pos[1] = cs_chroma_subsampling(&s, pos[1]);
516    /* Down Y */
517    pos[2] = pos[0];
518    /* Down UV */
519    pos[3] = pos[1];
520 
521    /* Weave offset */
522    nir_def *o_plus = nir_imm_vec2(b, 0.0f, 0.25f);
523    nir_def *o_minus = nir_imm_vec2(b, 0.0f, -0.25f);
524    for (unsigned i = 0; i < 4; ++i) {
525       pos[i] = cs_proj(&s, pos[i], i % 2 ? COORDS_CHROMA : COORDS_LUMA);
526       pos[i] = nir_fadd(b, pos[i], i < 2 ? o_plus : o_minus);
527    }
528 
529    /* Texture layer */
530    nir_def *tex_layer = nir_vec3(b,
531                                  nir_channel(b, pos[0], 1),
532                                  nir_channel(b, pos[1], 1),
533                                  nir_channel(b, pos[1], 1));
534    tex_layer = nir_fadd(b, tex_layer,
535                         nir_fneg(b, nir_fround_even(b, tex_layer)));
536    tex_layer = nir_fabs(b, tex_layer);
537    tex_layer = nir_fmul_imm(b, tex_layer, 2.0f);
538 
539    nir_def *col[6];
540    for (unsigned i = 0; i < 4; ++i) {
541       bool top = i < 2;
542       unsigned j = top ? 0 : 3;
543       unsigned flags = i % 2 ? COORDS_CHROMA : COORDS_LUMA;
544       pos[i] = cs_clamp(&s, pos[i], flags);
545       pos[i] = cs_normalize(&s, pos[i], flags);
546       pos[i] = nir_vector_insert_imm(b, pos[i],
547                                      top ? s.fzero : s.fone, 2);
548       if (flags == COORDS_LUMA) {
549          col[j] = cs_fetch_texel(&s, pos[i], 0);
550       } else {
551          col[j + 1] = cs_fetch_texel(&s, pos[i], 1);
552          col[j + 2] = cs_fetch_texel(&s, pos[i], 2);
553       }
554    }
555 
556    nir_def *color_top = nir_vec4(b, col[0], col[1], col[2], s.fone);
557    nir_def *color_down = nir_vec4(b, col[3], col[4], col[5], s.fone);
558    nir_def *color = nir_flrp(b, color_down, color_top, tex_layer);
559 
560    if (rgb) {
561       nir_def *alpha = cs_luma_key(&s, nir_channel(b, color, 2));
562       for (unsigned i = 0; i < 3; ++i)
563          col[i] = cs_color_space_conversion(&s, color, i);
564       color = nir_vec4(b, col[0], col[1], col[2], alpha);
565    } else if (y) {
566       color = nir_channel(b, color, 0);
567    } else {
568       nir_def *col1 = nir_channel(b, color, 1);
569       nir_def *col2 = nir_channel(b, color, 2);
570       color = nir_vec2(b, col1, col2);
571    }
572 
573    cs_image_store(&s, cs_translate(&s, ipos), color);
574 
575    return cs_create_shader_state(c, &s);
576 }
577 
create_rgba_shader(struct vl_compositor * c)578 static void *create_rgba_shader(struct vl_compositor *c)
579 {
580    struct cs_shader s = {
581       .name = "rgba",
582       .num_samplers = 1,
583    };
584    nir_builder *b = &s.b;
585 
586    nir_def *ipos = cs_create_shader(c, &s);
587    nir_def *pos = cs_tex_coords(&s, ipos, COORDS_LUMA);
588    nir_def *pos_out = cs_translate(&s, ipos);
589 
590    nir_def *col = cs_fetch_texel(&s, pos, 0);
591    nir_def *blend = cs_image_load(&s, pos_out);
592 
593    nir_def *color = nir_flrp(b, blend, col, nir_channel(b, col, 3));
594    color = nir_vector_insert_imm(b, color, s.fone, 3);
595 
596    cs_image_store(&s, pos_out, color);
597 
598    return cs_create_shader_state(c, &s);
599 }
600 
601 static void
cs_launch(struct vl_compositor * c,void * cs,const struct u_rect * draw_area)602 cs_launch(struct vl_compositor *c,
603           void                 *cs,
604           const struct u_rect  *draw_area)
605 {
606    struct pipe_context *ctx = c->pipe;
607    unsigned width, height;
608 
609    width = draw_area->x1 - draw_area->x0;
610    height = draw_area->y1 - draw_area->y0;
611 
612    /* Bind the image */
613    struct pipe_image_view image = {0};
614    image.resource = c->fb_state.cbufs[0]->texture;
615    image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;
616    image.format = c->fb_state.cbufs[0]->texture->format;
617 
618    ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, &image);
619 
620    /* Bind compute shader */
621    ctx->bind_compute_state(ctx, cs);
622 
623    /* Dispatch compute */
624    struct pipe_grid_info info = {0};
625    info.block[0] = 8;
626    info.last_block[0] = width % info.block[0];
627    info.block[1] = 8;
628    info.last_block[1] = height % info.block[1];
629    info.block[2] = 1;
630    info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
631    info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
632    info.grid[2] = 1;
633 
634    ctx->launch_grid(ctx, &info);
635 
636    /* Make the result visible to all clients. */
637    ctx->memory_barrier(ctx, PIPE_BARRIER_ALL);
638 
639 }
640 
641 static inline struct u_rect
calc_drawn_area(struct vl_compositor_state * s,struct vl_compositor_layer * layer)642 calc_drawn_area(struct vl_compositor_state *s,
643                 struct vl_compositor_layer *layer)
644 {
645    struct vertex2f tl, br;
646    struct u_rect result;
647 
648    assert(s && layer);
649 
650    tl = layer->dst.tl;
651    br = layer->dst.br;
652 
653    /* Scale */
654    result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
655    result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
656    result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
657    result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
658 
659    /* Clip */
660    result.x0 = MAX2(result.x0, s->scissor.minx);
661    result.y0 = MAX2(result.y0, s->scissor.miny);
662    result.x1 = MIN2(result.x1, s->scissor.maxx);
663    result.y1 = MIN2(result.y1, s->scissor.maxy);
664    return result;
665 }
666 
667 static inline void
calc_proj(struct vl_compositor_layer * layer,struct pipe_resource * texture,float m[2][4])668 calc_proj(struct vl_compositor_layer *layer,
669           struct pipe_resource *texture,
670           float m[2][4])
671 {
672    enum vl_compositor_mirror mirror = layer->mirror;
673    float ratio_x = (float)texture->width0 / layer->sampler_views[0]->texture->width0;
674    float ratio_y = (float)texture->height0 / layer->sampler_views[0]->texture->height0;
675    float width = layer->sampler_views[0]->texture->width0;
676    float height = layer->sampler_views[0]->texture->height0;
677    float translate_x = texture->width0 * ratio_x;
678    float translate_y = texture->height0 * ratio_y;
679 
680    memset(m, 0, sizeof(float) * 2 * 4);
681 
682    switch (layer->rotate) {
683    default:
684    case VL_COMPOSITOR_ROTATE_0:
685       m[0][0] = 1.0;
686       m[1][1] = 1.0;
687       break;
688    case VL_COMPOSITOR_ROTATE_90:
689       m[0][1] = 1.0;
690       m[1][0] = -1.0;
691       m[1][2] = translate_y;
692       width = layer->sampler_views[0]->texture->height0;
693       height = layer->sampler_views[0]->texture->width0;
694       break;
695    case VL_COMPOSITOR_ROTATE_180:
696       m[0][0] = 1.0;
697       m[1][1] = 1.0;
698       if (mirror != VL_COMPOSITOR_MIRROR_VERTICAL)
699          mirror = VL_COMPOSITOR_MIRROR_VERTICAL;
700       else
701          mirror = VL_COMPOSITOR_MIRROR_HORIZONTAL;
702       break;
703    case VL_COMPOSITOR_ROTATE_270:
704       m[0][1] = -1.0;
705       m[1][0] = 1.0;
706       m[0][2] = translate_x;
707       width = layer->sampler_views[0]->texture->height0;
708       height = layer->sampler_views[0]->texture->width0;
709       break;
710    }
711 
712    switch (mirror) {
713    default:
714    case VL_COMPOSITOR_MIRROR_NONE:
715       break;
716    case VL_COMPOSITOR_MIRROR_HORIZONTAL:
717       m[0][0] *= -1;
718       m[0][1] *= -1;
719       m[0][2] = translate_x - m[0][2];
720       break;
721    case VL_COMPOSITOR_MIRROR_VERTICAL:
722       m[1][0] *= -1;
723       m[1][1] *= -1;
724       m[1][2] = translate_y - m[1][2];
725       break;
726    }
727 
728    float scale_x = (width * (layer->src.br.x - layer->src.tl.x)) / layer->viewport.scale[0];
729    float scale_y = (height * (layer->src.br.y - layer->src.tl.y)) / layer->viewport.scale[1];
730 
731    m[0][0] *= scale_x;
732    m[0][1] *= scale_x;
733    m[0][2] *= scale_x;
734    m[1][0] *= scale_y;
735    m[1][1] *= scale_y;
736    m[1][2] *= scale_y;
737 
738    float crop_x = (layer->src.tl.x * width) * ratio_x;
739    float crop_y = (layer->src.tl.y * height) * ratio_y;
740 
741    m[0][2] += crop_x;
742    m[1][2] += crop_y;
743 }
744 
745 static inline float
chroma_offset_x(unsigned location)746 chroma_offset_x(unsigned location)
747 {
748    if (location & VL_COMPOSITOR_LOCATION_HORIZONTAL_LEFT)
749       return 0.5f;
750    else
751       return 0.0f;
752 }
753 
754 static inline float
chroma_offset_y(unsigned location)755 chroma_offset_y(unsigned location)
756 {
757    if (location & VL_COMPOSITOR_LOCATION_VERTICAL_TOP)
758       return 0.5f;
759    else if (location & VL_COMPOSITOR_LOCATION_VERTICAL_BOTTOM)
760       return -0.5f;
761    else
762       return 0.0f;
763 }
764 
765 static bool
set_viewport(struct vl_compositor_state * s,struct cs_viewport * drawn,struct pipe_sampler_view ** samplers)766 set_viewport(struct vl_compositor_state *s,
767              struct cs_viewport         *drawn,
768              struct pipe_sampler_view **samplers)
769 {
770    struct pipe_transfer *buf_transfer;
771 
772    assert(s && drawn);
773 
774    void *ptr = pipe_buffer_map(s->pipe, s->shader_params,
775                                PIPE_MAP_WRITE | PIPE_MAP_DISCARD_WHOLE_RESOURCE,
776                                &buf_transfer);
777 
778    if (!ptr)
779      return false;
780 
781    memcpy(ptr, &s->csc_matrix, sizeof(vl_csc_matrix));
782 
783    float *ptr_float = (float *)ptr;
784    ptr_float += sizeof(vl_csc_matrix) / sizeof(float);
785    *ptr_float++ = s->luma_min;
786    *ptr_float++ = s->luma_max;
787    *ptr_float++ = drawn->chroma_offset_x;
788    *ptr_float++ = drawn->chroma_offset_y;
789    ptr_float += 2; /* pad */
790 
791    int *ptr_int = (int *)ptr_float;
792    *ptr_int++ = drawn->translate_x;
793    *ptr_int++ = drawn->translate_y;
794 
795    ptr_float = (float *)ptr_int;
796    *ptr_float++ = drawn->sampler0_w;
797    *ptr_float++ = drawn->sampler0_h;
798 
799    /* compute_shader_video_buffer uses pixel coordinates based on the
800     * Y sampler dimensions. If U/V are using separate planes and are
801     * subsampled, we need to scale the coordinates */
802    if (samplers[1]) {
803       float h_ratio = samplers[1]->texture->width0 /
804                      (float) samplers[0]->texture->width0;
805       *ptr_float++ = h_ratio;
806       float v_ratio = samplers[1]->texture->height0 /
807                      (float) samplers[0]->texture->height0;
808       *ptr_float++ = v_ratio;
809    }
810    else {
811       *ptr_float++ = 1.0f;
812       *ptr_float++ = 1.0f;
813    }
814 
815    *ptr_float++ = drawn->clamp_x;
816    *ptr_float++ = drawn->clamp_y;
817    *ptr_float++ = drawn->chroma_clamp_x;
818    *ptr_float++ = drawn->chroma_clamp_y;
819 
820    memcpy(ptr_float, drawn->proj, sizeof(drawn->proj));
821    ptr_float += sizeof(drawn->proj) / sizeof(float);
822    memcpy(ptr_float, drawn->chroma_proj, sizeof(drawn->chroma_proj));
823 
824    pipe_buffer_unmap(s->pipe, buf_transfer);
825 
826    return true;
827 }
828 
829 static void
draw_layers(struct vl_compositor * c,struct vl_compositor_state * s,struct u_rect * dirty)830 draw_layers(struct vl_compositor       *c,
831             struct vl_compositor_state *s,
832             struct u_rect              *dirty)
833 {
834    unsigned i;
835 
836    assert(c);
837 
838    for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
839       if (s->used_layers & (1 << i)) {
840          struct vl_compositor_layer *layer = &s->layers[i];
841          struct pipe_sampler_view **samplers = &layer->sampler_views[0];
842          unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
843          struct pipe_sampler_view *sampler1 = samplers[1] ? samplers[1] : samplers[0];
844          struct cs_viewport drawn;
845 
846          drawn.area = calc_drawn_area(s, layer);
847          drawn.translate_x = layer->viewport.translate[0];
848          drawn.translate_y = layer->viewport.translate[1];
849          drawn.sampler0_w = (float)layer->sampler_views[0]->texture->width0;
850          drawn.sampler0_h = (float)layer->sampler_views[0]->texture->height0;
851          drawn.clamp_x = (float)samplers[0]->texture->width0 * layer->src.br.x - 0.5;
852          drawn.clamp_y = (float)samplers[0]->texture->height0 * layer->src.br.y - 0.5;
853          drawn.chroma_clamp_x = (float)sampler1->texture->width0 * layer->src.br.x - 0.5;
854          drawn.chroma_clamp_y = (float)sampler1->texture->height0 * layer->src.br.y - 0.5;
855          drawn.chroma_offset_x = chroma_offset_x(s->chroma_location);
856          drawn.chroma_offset_y = chroma_offset_y(s->chroma_location);
857          calc_proj(layer, samplers[0]->texture, drawn.proj);
858          calc_proj(layer, sampler1->texture, drawn.chroma_proj);
859          set_viewport(s, &drawn, samplers);
860 
861          c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
862                         num_sampler_views, layer->samplers);
863          c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0,
864                         num_sampler_views, 0, false, samplers);
865 
866          cs_launch(c, layer->cs, &(drawn.area));
867 
868          /* Unbind. */
869          c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL);
870          c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, false, NULL);
871          c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0, 0,
872                         num_sampler_views, false, NULL);
873          c->pipe->bind_compute_state(c->pipe, NULL);
874          c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
875                         num_sampler_views, NULL);
876 
877          if (dirty) {
878             struct u_rect drawn = calc_drawn_area(s, layer);
879             dirty->x0 = MIN2(drawn.x0, dirty->x0);
880             dirty->y0 = MIN2(drawn.y0, dirty->y0);
881             dirty->x1 = MAX2(drawn.x1, dirty->x1);
882             dirty->y1 = MAX2(drawn.y1, dirty->y1);
883          }
884       }
885    }
886 }
887 
888 void
vl_compositor_cs_render(struct vl_compositor_state * s,struct vl_compositor * c,struct pipe_surface * dst_surface,struct u_rect * dirty_area,bool clear_dirty)889 vl_compositor_cs_render(struct vl_compositor_state *s,
890                         struct vl_compositor       *c,
891                         struct pipe_surface        *dst_surface,
892                         struct u_rect              *dirty_area,
893                         bool                        clear_dirty)
894 {
895    assert(c && s);
896    assert(dst_surface);
897 
898    c->fb_state.width = dst_surface->width;
899    c->fb_state.height = dst_surface->height;
900    c->fb_state.cbufs[0] = dst_surface;
901 
902    if (!s->scissor_valid) {
903       s->scissor.minx = 0;
904       s->scissor.miny = 0;
905       s->scissor.maxx = dst_surface->width;
906       s->scissor.maxy = dst_surface->height;
907    }
908 
909    if (clear_dirty && dirty_area &&
910        (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
911 
912       c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
913                        0, 0, dst_surface->width, dst_surface->height, false);
914       dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY;
915       dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY;
916    }
917 
918    pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params);
919 
920    draw_layers(c, s, dirty_area);
921 }
922 
vl_compositor_cs_init_shaders(struct vl_compositor * c)923 bool vl_compositor_cs_init_shaders(struct vl_compositor *c)
924 {
925         assert(c);
926 
927         c->cs_video_buffer = create_video_buffer_shader(c);
928         if (!c->cs_video_buffer) {
929                 debug_printf("Unable to create video_buffer compute shader.\n");
930                 return false;
931         }
932 
933         c->cs_weave_rgb = create_weave_shader(c, true, false);
934         if (!c->cs_weave_rgb) {
935                 debug_printf("Unable to create weave_rgb compute shader.\n");
936                 return false;
937         }
938 
939         c->cs_rgba = create_rgba_shader(c);
940         if (!c->cs_rgba) {
941                 debug_printf("Unable to create rgba compute shader.\n");
942                 return false;
943         }
944 
945         c->cs_yuv.weave.y = create_weave_shader(c, false, true);
946         c->cs_yuv.weave.uv = create_weave_shader(c, false, false);
947         c->cs_yuv.progressive.y = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_Y);
948         c->cs_yuv.progressive.uv = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_UV);
949         c->cs_yuv.progressive.u = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_U);
950         c->cs_yuv.progressive.v = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_V);
951         if (!c->cs_yuv.weave.y || !c->cs_yuv.weave.uv) {
952                 debug_printf("Unable to create YCbCr i-to-YCbCr p deint compute shader.\n");
953                 return false;
954         }
955         if (!c->cs_yuv.progressive.y || !c->cs_yuv.progressive.uv ||
956             !c->cs_yuv.progressive.u || !c->cs_yuv.progressive.v) {
957                 debug_printf("Unable to create YCbCr p-to-NV12 compute shader.\n");
958                 return false;
959         }
960 
961         c->cs_rgb_yuv.y = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_Y);
962         c->cs_rgb_yuv.uv = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_UV);
963         c->cs_rgb_yuv.u = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_U);
964         c->cs_rgb_yuv.v = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_V);
965         if (!c->cs_rgb_yuv.y || !c->cs_rgb_yuv.uv ||
966             !c->cs_rgb_yuv.u || !c->cs_rgb_yuv.v) {
967                 debug_printf("Unable to create RGB-to-NV12 compute shader.\n");
968                 return false;
969         }
970 
971         return true;
972 }
973 
vl_compositor_cs_cleanup_shaders(struct vl_compositor * c)974 void vl_compositor_cs_cleanup_shaders(struct vl_compositor *c)
975 {
976         assert(c);
977 
978         if (c->cs_video_buffer)
979                 c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer);
980         if (c->cs_weave_rgb)
981                 c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb);
982         if (c->cs_rgba)
983                 c->pipe->delete_compute_state(c->pipe, c->cs_rgba);
984         if (c->cs_yuv.weave.y)
985                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.y);
986         if (c->cs_yuv.weave.uv)
987                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.uv);
988         if (c->cs_yuv.progressive.y)
989                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.y);
990         if (c->cs_yuv.progressive.uv)
991                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.uv);
992         if (c->cs_yuv.progressive.u)
993                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.u);
994         if (c->cs_yuv.progressive.v)
995                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.v);
996         if (c->cs_rgb_yuv.y)
997                 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.y);
998         if (c->cs_rgb_yuv.uv)
999                 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.uv);
1000         if (c->cs_rgb_yuv.u)
1001                 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.u);
1002         if (c->cs_rgb_yuv.v)
1003                 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.v);
1004 }
1005