• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2019 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  * Authors: James Zhu <james.zhu<@amd.com>
27  *
28  **************************************************************************/
29 
30 #include <assert.h>
31 
32 #include "nir/nir_builder.h"
33 #include "vl_compositor_cs.h"
34 
35 struct cs_viewport {
36    float scale_x;
37    float scale_y;
38    struct u_rect area;
39    float crop_x; /* src */
40    float crop_y;
41    int translate_x; /* dst */
42    int translate_y;
43    float sampler0_w;
44    float sampler0_h;
45    float clamp_x;
46    float clamp_y;
47    float chroma_clamp_x;
48    float chroma_clamp_y;
49    float chroma_offset_x;
50    float chroma_offset_y;
51 };
52 
53 struct cs_shader {
54    nir_builder b;
55    const char *name;
56    bool array;
57    unsigned num_samplers;
58    nir_variable *samplers[3];
59    nir_variable *image;
60    nir_def *params[8];
61    nir_def *fone;
62    nir_def *fzero;
63 };
64 
65 enum coords_flags {
66    COORDS_LUMA          = 0x0,
67    COORDS_CHROMA        = 0x1,
68    COORDS_CHROMA_OFFSET = 0x2,
69 };
70 
cs_create_shader(struct vl_compositor * c,struct cs_shader * s)71 static nir_def *cs_create_shader(struct vl_compositor *c, struct cs_shader *s)
72 {
73    /*
74       #version 450
75 
76       layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
77       layout (binding = 0) uniform sampler2DRect samplers[3]; // or sampler2DArray
78       layout (binding = 0) uniform image2D image;
79 
80       layout (std140, binding = 0) uniform ubo
81       {
82          vec4 csc_mat[3];      // params[0-2]
83          float luma_min;       // params[3].x
84          float luma_max;       // params[3].y
85          vec2 scale;           // params[3].zw
86          vec2 crop;            // params[4].xy
87          ivec2 translate;      // params[4].zw
88          vec2 sampler0_wh;     // params[5].xy
89          vec2 subsample_ratio; // params[5].zw
90          vec2 coord_clamp;     // params[6].xy
91          vec2 chroma_clamp;    // params[6].zw
92          vec2 chroma_offset;   // params[7].xy
93       };
94 
95       void main()
96       {
97          ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
98       }
99    */
100    enum glsl_sampler_dim sampler_dim = s->array ? GLSL_SAMPLER_DIM_2D : GLSL_SAMPLER_DIM_RECT;
101    const struct glsl_type *sampler_type =
102       glsl_sampler_type(sampler_dim, /*is_shadow*/ false, s->array, GLSL_TYPE_FLOAT);
103    const struct glsl_type *image_type =
104       glsl_image_type(GLSL_SAMPLER_DIM_2D, /*is_array*/ false, GLSL_TYPE_FLOAT);
105    const nir_shader_compiler_options *options =
106       c->pipe->screen->get_compiler_options(c->pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
107 
108    s->b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "vl:%s", s->name);
109    nir_builder *b = &s->b;
110    b->shader->info.workgroup_size[0] = 8;
111    b->shader->info.workgroup_size[1] = 8;
112    b->shader->info.workgroup_size[2] = 1;
113    b->shader->info.num_ubos = 1;
114    b->shader->num_uniforms = ARRAY_SIZE(s->params);
115 
116    nir_def *zero = nir_imm_int(b, 0);
117    for (unsigned i = 0; i < b->shader->num_uniforms; ++i)
118       s->params[i] = nir_load_ubo(b, 4, 32, zero, nir_imm_int(b, i * 16), .align_mul = 4, .range = ~0);
119 
120    for (unsigned i = 0; i < s->num_samplers; ++i) {
121       s->samplers[i] = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "sampler");
122       s->samplers[i]->data.binding = i;
123       BITSET_SET(b->shader->info.textures_used, i);
124       BITSET_SET(b->shader->info.samplers_used, i);
125    }
126 
127    s->image = nir_variable_create(b->shader, nir_var_image, image_type, "image");
128    s->image->data.binding = 0;
129    BITSET_SET(b->shader->info.images_used, 0);
130 
131    s->fone = nir_imm_float(b, 1.0f);
132    s->fzero = nir_imm_float(b, 0.0f);
133 
134    nir_def *block_ids = nir_load_workgroup_id(b);
135    nir_def *local_ids = nir_load_local_invocation_id(b);
136    return nir_iadd(b, nir_imul(b, block_ids, nir_imm_ivec3(b, 8, 8, 1)), local_ids);
137 }
138 
cs_create_shader_state(struct vl_compositor * c,struct cs_shader * s)139 static void *cs_create_shader_state(struct vl_compositor *c, struct cs_shader *s)
140 {
141    c->pipe->screen->finalize_nir(c->pipe->screen, s->b.shader);
142 
143    struct pipe_compute_state state = {0};
144    state.ir_type = PIPE_SHADER_IR_NIR;
145    state.prog = s->b.shader;
146 
147    /* create compute shader */
148    return c->pipe->create_compute_state(c->pipe, &state);
149 }
150 
cs_translate(struct cs_shader * s,nir_def * src)151 static inline nir_def *cs_translate(struct cs_shader *s, nir_def *src)
152 {
153    /*
154       return src.xy + params[4].zw;
155    */
156    nir_builder *b = &s->b;
157    return nir_iadd(b, src, nir_channels(b, s->params[4], 0x3 << 2));
158 }
159 
cs_texture_offset(struct cs_shader * s,nir_def * src)160 static inline nir_def *cs_texture_offset(struct cs_shader *s, nir_def *src)
161 {
162    /*
163       return src.xy + 0.5;
164    */
165    nir_builder *b = &s->b;
166    return nir_fadd_imm(b, src, 0.5f);
167 }
168 
cs_chroma_subsampling(struct cs_shader * s,nir_def * src)169 static inline nir_def *cs_chroma_subsampling(struct cs_shader *s, nir_def *src)
170 {
171    /*
172       return src.xy * params[5].zw;
173    */
174    nir_builder *b = &s->b;
175    return nir_fmul(b, src, nir_channels(b, s->params[5], 0x3 << 2));
176 }
177 
cs_scale(struct cs_shader * s,nir_def * src)178 static inline nir_def *cs_scale(struct cs_shader *s, nir_def *src)
179 {
180    /*
181       return src.xy / params[3].zw;
182    */
183    nir_builder *b = &s->b;
184    return nir_fdiv(b, src, nir_channels(b, s->params[3], 0x3 << 2));
185 }
186 
cs_luma_key(struct cs_shader * s,nir_def * src)187 static inline nir_def *cs_luma_key(struct cs_shader *s, nir_def *src)
188 {
189    /*
190       bool luma_min = params[3].x >= src;
191       bool luma_max = params[3].y < src;
192       return float(luma_min || luma_max);
193    */
194    nir_builder *b = &s->b;
195    nir_def *luma_min = nir_fge(b, nir_channel(b, s->params[3], 0), src);
196    nir_def *luma_max = nir_flt(b, nir_channel(b, s->params[3], 1), src);
197    return nir_b2f32(b, nir_ior(b, luma_min, luma_max));
198 }
199 
cs_chroma_offset(struct cs_shader * s,nir_def * src,unsigned flags)200 static inline nir_def *cs_chroma_offset(struct cs_shader *s, nir_def *src, unsigned flags)
201 {
202    /*
203       vec2 offset = params[7].xy;
204       if (flags & COORDS_CHROMA)
205          return src.xy + offset;
206       return offset * -0.5 + src.xy;
207    */
208    nir_builder *b = &s->b;
209    nir_def *offset = nir_channels(b, s->params[7], 0x3);
210    if (flags & COORDS_CHROMA)
211       return nir_fadd(b, src, offset);
212    return nir_ffma_imm1(b, offset, -0.5f, src);
213 }
214 
cs_clamp(struct cs_shader * s,nir_def * src,unsigned flags)215 static inline nir_def *cs_clamp(struct cs_shader *s, nir_def *src, unsigned flags)
216 {
217    /*
218       vec2 coord_max;
219       if (flags & COORDS_CHROMA)
220          coord_max = params[6].zw;
221       else
222          coord_max = params[6].xy;
223       return min(src.xy, coord_max);
224    */
225    nir_builder *b = &s->b;
226    nir_component_mask_t mask = flags & COORDS_CHROMA ? 0x3 << 2 : 0x3;
227    return nir_fmin(b, src, nir_channels(b, s->params[6], mask));
228 }
229 
cs_normalize(struct cs_shader * s,nir_def * src,unsigned flags)230 static inline nir_def *cs_normalize(struct cs_shader *s, nir_def *src, unsigned flags)
231 {
232    /*
233       vec2 div = params[5].xy;
234       if (flags & COORDS_CHROMA)
235          div = cs_chroma_subsampling(div);
236       return src.xy / div;
237    */
238    nir_builder *b = &s->b;
239    nir_def *div = nir_channels(b, s->params[5], 0x3);
240    if (flags & COORDS_CHROMA)
241       div = cs_chroma_subsampling(s, div);
242    return nir_fdiv(b, src, div);
243 }
244 
cs_crop(struct cs_shader * s,nir_def * src,unsigned flags)245 static inline nir_def *cs_crop(struct cs_shader *s, nir_def *src, unsigned flags)
246 {
247    /*
248       vec2 crop = params[4].xy;
249       if (flags & COORDS_CHROMA)
250          crop = cs_chroma_subsampling(crop);
251       return src.xy + crop;
252    */
253    nir_builder *b = &s->b;
254    nir_def *crop = nir_channels(b, s->params[4], 0x3);
255    if (flags & COORDS_CHROMA)
256       crop = cs_chroma_subsampling(s, crop);
257    return nir_fadd(b, src, crop);
258 }
259 
cs_color_space_conversion(struct cs_shader * s,nir_def * src,unsigned comp)260 static inline nir_def *cs_color_space_conversion(struct cs_shader *s, nir_def *src, unsigned comp)
261 {
262    /*
263       return dot(src, params[comp]);
264    */
265    nir_builder *b = &s->b;
266    return nir_fdot4(b, src, s->params[comp]);
267 }
268 
cs_fetch_texel(struct cs_shader * s,nir_def * coords,unsigned sampler)269 static inline nir_def *cs_fetch_texel(struct cs_shader *s, nir_def *coords, unsigned sampler)
270 {
271    /*
272       return texture(samplers[sampler], s->array ? coords.xyz : coords.xy);
273    */
274    nir_builder *b = &s->b;
275    nir_deref_instr *tex_deref = nir_build_deref_var(b, s->samplers[sampler]);
276    nir_component_mask_t mask = s->array ? 0x7 : 0x3;
277    return nir_tex_deref(b, tex_deref, tex_deref, nir_channels(b, coords, mask));
278 }
279 
cs_image_load(struct cs_shader * s,nir_def * pos)280 static inline nir_def *cs_image_load(struct cs_shader *s, nir_def *pos)
281 {
282    /*
283       imageLoad(image, pos.xy);
284    */
285    nir_builder *b = &s->b;
286    nir_def *zero = nir_imm_int(b, 0);
287    nir_def *sample = nir_imm_int(b, 0);
288    pos = nir_pad_vector_imm_int(b, pos, 0, 4);
289    enum glsl_sampler_dim sampler_dim = s->array ? GLSL_SAMPLER_DIM_2D : GLSL_SAMPLER_DIM_RECT;
290    return nir_image_deref_load(b, 4, 32, &nir_build_deref_var(b, s->image)->def, pos, sample, zero,
291                                .image_dim = sampler_dim,
292                                .image_array = s->array);
293 }
294 
cs_image_store(struct cs_shader * s,nir_def * pos,nir_def * color)295 static inline void cs_image_store(struct cs_shader *s, nir_def *pos, nir_def *color)
296 {
297    /*
298       imageStore(image, pos.xy, color);
299    */
300    nir_builder *b = &s->b;
301    nir_def *zero = nir_imm_int(b, 0);
302    nir_def *undef32 = nir_undef(b, 1, 32);
303    pos = nir_pad_vector_imm_int(b, pos, 0, 4);
304    nir_image_deref_store(b, &nir_build_deref_var(b, s->image)->def, pos, undef32, color, zero);
305 }
306 
cs_tex_coords(struct cs_shader * s,nir_def * coords,unsigned flags)307 static nir_def *cs_tex_coords(struct cs_shader *s, nir_def *coords, unsigned flags)
308 {
309    nir_builder *b = &s->b;
310 
311    coords = nir_u2f32(b, coords);
312    coords = cs_texture_offset(s, coords);
313 
314    if (flags & COORDS_CHROMA_OFFSET)
315       coords = cs_chroma_offset(s, coords, flags);
316 
317    if (flags & COORDS_CHROMA)
318       coords = cs_chroma_subsampling(s, coords);
319 
320    coords = cs_scale(s, coords);
321    coords = cs_crop(s, coords, flags);
322    coords = cs_clamp(s, coords, flags);
323 
324    return coords;
325 }
326 
create_video_buffer_shader(struct vl_compositor * c)327 static void *create_video_buffer_shader(struct vl_compositor *c)
328 {
329    struct cs_shader s = {
330       .name = "video_buffer",
331       .num_samplers = 3,
332    };
333    nir_builder *b = &s.b;
334 
335    nir_def *ipos = cs_create_shader(c, &s);
336    nir_def *pos[2] = {
337       cs_tex_coords(&s, ipos, COORDS_LUMA),
338       cs_tex_coords(&s, ipos, COORDS_CHROMA | COORDS_CHROMA_OFFSET),
339    };
340 
341    nir_def *col[3];
342    for (unsigned i = 0; i < 3; ++i)
343       col[i] = cs_fetch_texel(&s, pos[MIN2(i, 1)], i);
344 
345    nir_def *alpha = cs_luma_key(&s, col[2]);
346 
347    nir_def *color = nir_vec4(b, col[0], col[1], col[2], s.fone);
348    for (unsigned i = 0; i < 3; ++i)
349       col[i] = cs_color_space_conversion(&s, color, i);
350 
351    color = nir_vec4(b, col[0], col[1], col[2], alpha);
352    cs_image_store(&s, cs_translate(&s, ipos), color);
353 
354    return cs_create_shader_state(c, &s);
355 }
356 
create_yuv_progressive_shader(struct vl_compositor * c,enum vl_compositor_plane plane)357 static void *create_yuv_progressive_shader(struct vl_compositor *c, enum vl_compositor_plane plane)
358 {
359    struct cs_shader s = {
360       .name = "yuv_progressive",
361       .num_samplers = 3,
362    };
363    nir_builder *b = &s.b;
364 
365    nir_def *ipos = cs_create_shader(c, &s);
366    nir_def *pos = cs_tex_coords(&s, ipos, plane == VL_COMPOSITOR_PLANE_Y ? COORDS_LUMA : COORDS_CHROMA);
367 
368    nir_def *color;
369    if (plane != VL_COMPOSITOR_PLANE_UV) {
370       unsigned c = 0;
371       if (plane == VL_COMPOSITOR_PLANE_U)
372          c = 1;
373       else if (plane == VL_COMPOSITOR_PLANE_V)
374          c = 2;
375       color = nir_channel(b, cs_fetch_texel(&s, pos, c), c);
376    } else {
377       nir_def *col1 = cs_fetch_texel(&s, pos, 1);
378       nir_def *col2 = cs_fetch_texel(&s, pos, 2);
379       color = nir_vec2(b, col1, col2);
380    }
381 
382    cs_image_store(&s, cs_translate(&s, ipos), color);
383 
384    return cs_create_shader_state(c, &s);
385 }
386 
create_rgb_yuv_shader(struct vl_compositor * c,enum vl_compositor_plane plane)387 static void *create_rgb_yuv_shader(struct vl_compositor *c, enum vl_compositor_plane plane)
388 {
389    struct cs_shader s = {
390       .name = "rgb_yuv",
391       .num_samplers = 1,
392    };
393    nir_builder *b = &s.b;
394 
395    nir_def *ipos = cs_create_shader(c, &s);
396    nir_def *color = NULL;
397 
398    if (plane == VL_COMPOSITOR_PLANE_Y) {
399       nir_def *pos = cs_tex_coords(&s, ipos, COORDS_LUMA);
400       color = cs_fetch_texel(&s, pos, 0);
401    } else {
402       /*
403          vec2 pos[4];
404          pos[0] = vec2(ipos);
405          pos[0] = cs_texture_offset(pos[0]);
406          pos[0] = cs_chroma_offset(pos[0], COORDS_LUMA);
407 
408          // Sample offset
409          pos[3] = pos[0] + vec2( 0.25, -0.25);
410          pos[2] = pos[0] + vec2(-0.25,  0.25);
411          pos[1] = pos[0] + vec2(-0.25, -0.25);
412          pos[0] = pos[0] + vec2( 0.25,  0.25);
413 
414          vec4 col[4];
415          for (uint i = 0; i < 4; ++i) {
416             pos[i] = cs_scale(pos[i]);
417             pos[i] = cs_crop(pos[i], COORDS_LUMA);
418             pos[i] = cs_clamp(pos[i], COORDS_LUMA);
419             col[i] = texture(samp[0], pos[i]);
420          }
421          color = (col[0] + col[1] + col[2] + col[3]) * 0.25;
422       */
423       nir_def *pos[4];
424       pos[0] = nir_u2f32(b, ipos);
425       pos[0] = cs_texture_offset(&s, pos[0]);
426       pos[0] = cs_chroma_offset(&s, pos[0], COORDS_LUMA);
427 
428       /* Sample offset */
429       nir_def *o_plus = nir_imm_float(b, 0.25f);
430       nir_def *o_minus = nir_imm_float(b, -0.25f);
431       pos[3] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_minus));
432       pos[2] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_plus));
433       pos[1] = nir_fadd(b, pos[0], nir_vec2(b, o_minus, o_minus));
434       pos[0] = nir_fadd(b, pos[0], nir_vec2(b, o_plus, o_plus));
435 
436       for (unsigned i = 0; i < 4; ++i) {
437          pos[i] = cs_scale(&s, pos[i]);
438          pos[i] = cs_crop(&s, pos[i], COORDS_LUMA);
439          pos[i] = cs_clamp(&s, pos[i], COORDS_LUMA);
440 
441          nir_def *c = cs_fetch_texel(&s, pos[i], 0);
442          color = color ? nir_fadd(b, color, c) : c;
443       }
444       color = nir_fmul_imm(b, color, 0.25f);
445    }
446 
447    color = nir_vector_insert_imm(b, color, s.fone, 3);
448 
449    if (plane != VL_COMPOSITOR_PLANE_UV) {
450       unsigned c = 0;
451       if (plane == VL_COMPOSITOR_PLANE_U)
452          c = 1;
453       else if (plane == VL_COMPOSITOR_PLANE_V)
454          c = 2;
455       color = cs_color_space_conversion(&s, color, c);
456    } else {
457       nir_def *col1 = cs_color_space_conversion(&s, color, 1);
458       nir_def *col2 = cs_color_space_conversion(&s, color, 2);
459       color = nir_vec2(b, col1, col2);
460    }
461 
462    cs_image_store(&s, cs_translate(&s, ipos), color);
463 
464    return cs_create_shader_state(c, &s);
465 }
466 
create_weave_shader(struct vl_compositor * c,bool rgb,bool y)467 static nir_def *create_weave_shader(struct vl_compositor *c, bool rgb, bool y)
468 {
469    struct cs_shader s = {
470       .name = rgb ? "weave" : y ? "yuv_weave_y" : "yuv_weave_uv",
471       .array = true,
472       .num_samplers = 3,
473    };
474    nir_builder *b = &s.b;
475 
476    nir_def *ipos = cs_create_shader(c, &s);
477 
478    /*
479       vec2 top_y = cs_texture_offset(vec2(ipos));
480       vec2 top_uv = rgb ? cs_chroma_offset(top_y, COORDS_CHROMA) : top_y;
481       top_uv = cs_chroma_subsampling(top_uv);
482       vec2 down_y = top_y;
483       vec2 down_uv = top_uv;
484 
485       top_y = cs_crop(cs_scale(top_y), COORDS_LUMA);
486       top_uv = cs_crop(cs_scale(top_uv), COORDS_CHROMA);
487       down_y = cs_crop(cs_scale(down_y), COORDS_LUMA);
488       down_uv = cs_crop(cs_scale(down_uv), COORDS_CHROMA);
489 
490       // Weave offset
491       top_y = top_y + vec2(0.0, 0.25);
492       top_uv = top_uv + vec2(0.0, 0.25);
493       down_y = down_y + vec2(0.0, -0.25);
494       down_uv = down_uv + vec2(0.0, -0.25);
495 
496       // Texture layer
497       vec3 tex_layer = vec3(top_y.y, top_uv.y, top_uv.y);
498       tex_layer = tex_layer + round(tex_layer) * -1.0;
499       tex_layer = abs(tex_layer) * 2.0;
500 
501       top_y = cs_clamp(top_y, COORDS_LUMA);
502       top_y = cs_normalize(top_y, COORDS_LUMA);
503       top_uv = cs_clamp(top_uv, COORDS_CHROMA);
504       top_uv = cs_normalize(top_uv, COORDS_CHROMA);
505       down_y = cs_clamp(down_y, COORDS_LUMA);
506       down_y = cs_normalize(down_y, COORDS_LUMA);
507       down_uv = cs_clamp(down_uv, COORDS_CHROMA);
508       down_uv = cs_normalize(down_uv, COORDS_CHROMA);
509 
510       vec4 top_col, down_col;
511       top_col.x = texture(samp[0], vec3(top_y, 0.0)).x;
512       top_col.y = texture(samp[1], vec3(top_uv, 0.0)).x;
513       top_col.z = texture(samp[2], vec3(top_uv, 0.0)).x;
514       top_col.w = 1.0;
515       down_col.x = texture(samp[0], vec3(down_y, 1.0)).x;
516       down_col.y = texture(samp[1], vec3(down_uv, 1.0)).x;
517       down_col.z = texture(samp[2], vec3(down_uv, 1.0)).x;
518       down_col.w = 1.0;
519 
520       vec4 color = mix(down_col, top_col, tex_layer);
521    */
522    nir_def *pos[4];
523    /* Top Y */
524    pos[0] = nir_u2f32(b, ipos);
525    pos[0] = cs_texture_offset(&s, pos[0]);
526    /* Top UV */
527    pos[1] = rgb ? cs_chroma_offset(&s, pos[0], COORDS_CHROMA) : pos[0];
528    pos[1] = cs_chroma_subsampling(&s, pos[1]);
529    /* Down Y */
530    pos[2] = pos[0];
531    /* Down UV */
532    pos[3] = pos[1];
533 
534    /* Weave offset */
535    nir_def *o_plus = nir_imm_vec2(b, 0.0f, 0.25f);
536    nir_def *o_minus = nir_imm_vec2(b, 0.0f, -0.25f);
537    for (unsigned i = 0; i < 4; ++i) {
538       pos[i] = cs_scale(&s, pos[i]);
539       pos[i] = cs_crop(&s, pos[i], i % 2 ? COORDS_CHROMA : COORDS_LUMA);
540       pos[i] = nir_fadd(b, pos[i], i < 2 ? o_plus : o_minus);
541    }
542 
543    /* Texture layer */
544    nir_def *tex_layer = nir_vec3(b,
545                                  nir_channel(b, pos[0], 1),
546                                  nir_channel(b, pos[1], 1),
547                                  nir_channel(b, pos[1], 1));
548    tex_layer = nir_fadd(b, tex_layer,
549                         nir_fneg(b, nir_fround_even(b, tex_layer)));
550    tex_layer = nir_fabs(b, tex_layer);
551    tex_layer = nir_fmul_imm(b, tex_layer, 2.0f);
552 
553    nir_def *col[6];
554    for (unsigned i = 0; i < 4; ++i) {
555       bool top = i < 2;
556       unsigned j = top ? 0 : 3;
557       unsigned flags = i % 2 ? COORDS_CHROMA : COORDS_LUMA;
558       pos[i] = cs_clamp(&s, pos[i], flags);
559       pos[i] = cs_normalize(&s, pos[i], flags);
560       pos[i] = nir_vector_insert_imm(b, pos[i],
561                                      top ? s.fzero : s.fone, 2);
562       if (flags == COORDS_LUMA) {
563          col[j] = cs_fetch_texel(&s, pos[i], 0);
564       } else {
565          col[j + 1] = cs_fetch_texel(&s, pos[i], 1);
566          col[j + 2] = cs_fetch_texel(&s, pos[i], 2);
567       }
568    }
569 
570    nir_def *color_top = nir_vec4(b, col[0], col[1], col[2], s.fone);
571    nir_def *color_down = nir_vec4(b, col[3], col[4], col[5], s.fone);
572    nir_def *color = nir_flrp(b, color_down, color_top, tex_layer);
573 
574    if (rgb) {
575       nir_def *alpha = cs_luma_key(&s, nir_channel(b, color, 2));
576       for (unsigned i = 0; i < 3; ++i)
577          col[i] = cs_color_space_conversion(&s, color, i);
578       color = nir_vec4(b, col[0], col[1], col[2], alpha);
579    } else if (y) {
580       color = nir_channel(b, color, 0);
581    } else {
582       nir_def *col1 = nir_channel(b, color, 1);
583       nir_def *col2 = nir_channel(b, color, 2);
584       color = nir_vec2(b, col1, col2);
585    }
586 
587    cs_image_store(&s, cs_translate(&s, ipos), color);
588 
589    return cs_create_shader_state(c, &s);
590 }
591 
create_rgba_shader(struct vl_compositor * c)592 static void *create_rgba_shader(struct vl_compositor *c)
593 {
594    struct cs_shader s = {
595       .name = "rgba",
596       .num_samplers = 1,
597    };
598    nir_builder *b = &s.b;
599 
600    nir_def *ipos = cs_create_shader(c, &s);
601    nir_def *pos = cs_tex_coords(&s, ipos, COORDS_LUMA);
602    nir_def *pos_out = cs_translate(&s, ipos);
603 
604    nir_def *col = cs_fetch_texel(&s, pos, 0);
605    nir_def *blend = cs_image_load(&s, pos_out);
606 
607    nir_def *color = nir_flrp(b, blend, col, nir_channel(b, col, 3));
608    color = nir_vector_insert_imm(b, color, s.fone, 3);
609 
610    cs_image_store(&s, pos_out, color);
611 
612    return cs_create_shader_state(c, &s);
613 }
614 
615 static void
cs_launch(struct vl_compositor * c,void * cs,const struct u_rect * draw_area)616 cs_launch(struct vl_compositor *c,
617           void                 *cs,
618           const struct u_rect  *draw_area)
619 {
620    struct pipe_context *ctx = c->pipe;
621    unsigned width, height;
622 
623    width = draw_area->x1 - draw_area->x0;
624    height = draw_area->y1 - draw_area->y0;
625 
626    /* Bind the image */
627    struct pipe_image_view image = {0};
628    image.resource = c->fb_state.cbufs[0]->texture;
629    image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;
630    image.format = c->fb_state.cbufs[0]->texture->format;
631 
632    ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, &image);
633 
634    /* Bind compute shader */
635    ctx->bind_compute_state(ctx, cs);
636 
637    /* Dispatch compute */
638    struct pipe_grid_info info = {0};
639    info.block[0] = 8;
640    info.last_block[0] = width % info.block[0];
641    info.block[1] = 8;
642    info.last_block[1] = height % info.block[1];
643    info.block[2] = 1;
644    info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
645    info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
646    info.grid[2] = 1;
647 
648    ctx->launch_grid(ctx, &info);
649 
650    /* Make the result visible to all clients. */
651    ctx->memory_barrier(ctx, PIPE_BARRIER_ALL);
652 
653 }
654 
655 static inline struct u_rect
calc_drawn_area(struct vl_compositor_state * s,struct vl_compositor_layer * layer)656 calc_drawn_area(struct vl_compositor_state *s,
657                 struct vl_compositor_layer *layer)
658 {
659    struct vertex2f tl, br;
660    struct u_rect result;
661 
662    assert(s && layer);
663 
664    tl = layer->dst.tl;
665    br = layer->dst.br;
666 
667    /* Scale */
668    result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
669    result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
670    result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
671    result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
672 
673    /* Clip */
674    result.x0 = MAX2(result.x0, s->scissor.minx);
675    result.y0 = MAX2(result.y0, s->scissor.miny);
676    result.x1 = MIN2(result.x1, s->scissor.maxx);
677    result.y1 = MIN2(result.y1, s->scissor.maxy);
678    return result;
679 }
680 
681 static inline float
chroma_offset_x(unsigned location)682 chroma_offset_x(unsigned location)
683 {
684    if (location & VL_COMPOSITOR_LOCATION_HORIZONTAL_LEFT)
685       return 0.5f;
686    else
687       return 0.0f;
688 }
689 
690 static inline float
chroma_offset_y(unsigned location)691 chroma_offset_y(unsigned location)
692 {
693    if (location & VL_COMPOSITOR_LOCATION_VERTICAL_TOP)
694       return 0.5f;
695    else if (location & VL_COMPOSITOR_LOCATION_VERTICAL_BOTTOM)
696       return -0.5f;
697    else
698       return 0.0f;
699 }
700 
701 static bool
set_viewport(struct vl_compositor_state * s,struct cs_viewport * drawn,struct pipe_sampler_view ** samplers)702 set_viewport(struct vl_compositor_state *s,
703              struct cs_viewport         *drawn,
704              struct pipe_sampler_view **samplers)
705 {
706    struct pipe_transfer *buf_transfer;
707 
708    assert(s && drawn);
709 
710    void *ptr = pipe_buffer_map(s->pipe, s->shader_params,
711                                PIPE_MAP_WRITE | PIPE_MAP_DISCARD_WHOLE_RESOURCE,
712                                &buf_transfer);
713 
714    if (!ptr)
715      return false;
716 
717    memcpy(ptr, &s->csc_matrix, sizeof(vl_csc_matrix));
718 
719    float *ptr_float = (float *)ptr;
720    ptr_float += sizeof(vl_csc_matrix) / sizeof(float);
721    *ptr_float++ = s->luma_min;
722    *ptr_float++ = s->luma_max;
723    *ptr_float++ = drawn->scale_x;
724    *ptr_float++ = drawn->scale_y;
725    *ptr_float++ = drawn->crop_x;
726    *ptr_float++ = drawn->crop_y;
727 
728    int *ptr_int = (int *)ptr_float;
729    *ptr_int++ = drawn->translate_x;
730    *ptr_int++ = drawn->translate_y;
731 
732    ptr_float = (float *)ptr_int;
733    *ptr_float++ = drawn->sampler0_w;
734    *ptr_float++ = drawn->sampler0_h;
735 
736    /* compute_shader_video_buffer uses pixel coordinates based on the
737     * Y sampler dimensions. If U/V are using separate planes and are
738     * subsampled, we need to scale the coordinates */
739    if (samplers[1]) {
740       float h_ratio = samplers[1]->texture->width0 /
741                      (float) samplers[0]->texture->width0;
742       *ptr_float++ = h_ratio;
743       float v_ratio = samplers[1]->texture->height0 /
744                      (float) samplers[0]->texture->height0;
745       *ptr_float++ = v_ratio;
746    }
747    else {
748       *ptr_float++ = 1.0f;
749       *ptr_float++ = 1.0f;
750    }
751 
752 
753    *ptr_float++ = drawn->clamp_x;
754    *ptr_float++ = drawn->clamp_y;
755    *ptr_float++ = drawn->chroma_clamp_x;
756    *ptr_float++ = drawn->chroma_clamp_y;
757    *ptr_float++ = drawn->chroma_offset_x;
758    *ptr_float++ = drawn->chroma_offset_y;
759 
760    pipe_buffer_unmap(s->pipe, buf_transfer);
761 
762    return true;
763 }
764 
765 static void
draw_layers(struct vl_compositor * c,struct vl_compositor_state * s,struct u_rect * dirty)766 draw_layers(struct vl_compositor       *c,
767             struct vl_compositor_state *s,
768             struct u_rect              *dirty)
769 {
770    unsigned i;
771 
772    assert(c);
773 
774    for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
775       if (s->used_layers & (1 << i)) {
776          struct vl_compositor_layer *layer = &s->layers[i];
777          struct pipe_sampler_view **samplers = &layer->sampler_views[0];
778          unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
779          struct pipe_sampler_view *sampler1 = samplers[1] ? samplers[1] : samplers[0];
780          struct cs_viewport drawn;
781 
782          drawn.area = calc_drawn_area(s, layer);
783          drawn.scale_x = layer->viewport.scale[0] /
784             ((float)layer->sampler_views[0]->texture->width0 *
785              (layer->src.br.x - layer->src.tl.x));
786          drawn.scale_y  = layer->viewport.scale[1] /
787             ((float)layer->sampler_views[0]->texture->height0 *
788              (layer->src.br.y - layer->src.tl.y));
789          drawn.crop_x = layer->src.tl.x * layer->sampler_views[0]->texture->width0;
790          drawn.translate_x = layer->viewport.translate[0];
791          drawn.crop_y = layer->src.tl.y * layer->sampler_views[0]->texture->height0;
792          drawn.translate_y = layer->viewport.translate[1];
793          drawn.sampler0_w = (float)layer->sampler_views[0]->texture->width0;
794          drawn.sampler0_h = (float)layer->sampler_views[0]->texture->height0;
795          drawn.clamp_x = (float)samplers[0]->texture->width0 * layer->src.br.x - 0.5;
796          drawn.clamp_y = (float)samplers[0]->texture->height0 * layer->src.br.y - 0.5;
797          drawn.chroma_clamp_x = (float)sampler1->texture->width0 * layer->src.br.x - 0.5;
798          drawn.chroma_clamp_y = (float)sampler1->texture->height0 * layer->src.br.y - 0.5;
799          drawn.chroma_offset_x = chroma_offset_x(s->chroma_location);
800          drawn.chroma_offset_y = chroma_offset_y(s->chroma_location);
801          set_viewport(s, &drawn, samplers);
802 
803          c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
804                         num_sampler_views, layer->samplers);
805          c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0,
806                         num_sampler_views, 0, false, samplers);
807 
808          cs_launch(c, layer->cs, &(drawn.area));
809 
810          /* Unbind. */
811          c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL);
812          c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, false, NULL);
813          c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0, 0,
814                         num_sampler_views, false, NULL);
815          c->pipe->bind_compute_state(c->pipe, NULL);
816          c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
817                         num_sampler_views, NULL);
818 
819          if (dirty) {
820             struct u_rect drawn = calc_drawn_area(s, layer);
821             dirty->x0 = MIN2(drawn.x0, dirty->x0);
822             dirty->y0 = MIN2(drawn.y0, dirty->y0);
823             dirty->x1 = MAX2(drawn.x1, dirty->x1);
824             dirty->y1 = MAX2(drawn.y1, dirty->y1);
825          }
826       }
827    }
828 }
829 
830 void
vl_compositor_cs_render(struct vl_compositor_state * s,struct vl_compositor * c,struct pipe_surface * dst_surface,struct u_rect * dirty_area,bool clear_dirty)831 vl_compositor_cs_render(struct vl_compositor_state *s,
832                         struct vl_compositor       *c,
833                         struct pipe_surface        *dst_surface,
834                         struct u_rect              *dirty_area,
835                         bool                        clear_dirty)
836 {
837    assert(c && s);
838    assert(dst_surface);
839 
840    c->fb_state.width = dst_surface->width;
841    c->fb_state.height = dst_surface->height;
842    c->fb_state.cbufs[0] = dst_surface;
843 
844    if (!s->scissor_valid) {
845       s->scissor.minx = 0;
846       s->scissor.miny = 0;
847       s->scissor.maxx = dst_surface->width;
848       s->scissor.maxy = dst_surface->height;
849    }
850 
851    if (clear_dirty && dirty_area &&
852        (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
853 
854       c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
855                        0, 0, dst_surface->width, dst_surface->height, false);
856       dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY;
857       dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY;
858    }
859 
860    pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params);
861 
862    draw_layers(c, s, dirty_area);
863 }
864 
vl_compositor_cs_init_shaders(struct vl_compositor * c)865 bool vl_compositor_cs_init_shaders(struct vl_compositor *c)
866 {
867         assert(c);
868 
869         c->cs_video_buffer = create_video_buffer_shader(c);
870         if (!c->cs_video_buffer) {
871                 debug_printf("Unable to create video_buffer compute shader.\n");
872                 return false;
873         }
874 
875         c->cs_weave_rgb = create_weave_shader(c, true, false);
876         if (!c->cs_weave_rgb) {
877                 debug_printf("Unable to create weave_rgb compute shader.\n");
878                 return false;
879         }
880 
881         c->cs_rgba = create_rgba_shader(c);
882         if (!c->cs_rgba) {
883                 debug_printf("Unable to create rgba compute shader.\n");
884                 return false;
885         }
886 
887         c->cs_yuv.weave.y = create_weave_shader(c, false, true);
888         c->cs_yuv.weave.uv = create_weave_shader(c, false, false);
889         c->cs_yuv.progressive.y = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_Y);
890         c->cs_yuv.progressive.uv = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_UV);
891         c->cs_yuv.progressive.u = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_U);
892         c->cs_yuv.progressive.v = create_yuv_progressive_shader(c, VL_COMPOSITOR_PLANE_V);
893         if (!c->cs_yuv.weave.y || !c->cs_yuv.weave.uv) {
894                 debug_printf("Unable to create YCbCr i-to-YCbCr p deint compute shader.\n");
895                 return false;
896         }
897         if (!c->cs_yuv.progressive.y || !c->cs_yuv.progressive.uv ||
898             !c->cs_yuv.progressive.u || !c->cs_yuv.progressive.v) {
899                 debug_printf("Unable to create YCbCr p-to-NV12 compute shader.\n");
900                 return false;
901         }
902 
903         c->cs_rgb_yuv.y = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_Y);
904         c->cs_rgb_yuv.uv = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_UV);
905         c->cs_rgb_yuv.u = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_U);
906         c->cs_rgb_yuv.v = create_rgb_yuv_shader(c, VL_COMPOSITOR_PLANE_V);
907         if (!c->cs_rgb_yuv.y || !c->cs_rgb_yuv.uv ||
908             !c->cs_rgb_yuv.u || !c->cs_rgb_yuv.v) {
909                 debug_printf("Unable to create RGB-to-NV12 compute shader.\n");
910                 return false;
911         }
912 
913         return true;
914 }
915 
vl_compositor_cs_cleanup_shaders(struct vl_compositor * c)916 void vl_compositor_cs_cleanup_shaders(struct vl_compositor *c)
917 {
918         assert(c);
919 
920         if (c->cs_video_buffer)
921                 c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer);
922         if (c->cs_weave_rgb)
923                 c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb);
924         if (c->cs_rgba)
925                 c->pipe->delete_compute_state(c->pipe, c->cs_rgba);
926         if (c->cs_yuv.weave.y)
927                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.y);
928         if (c->cs_yuv.weave.uv)
929                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.uv);
930         if (c->cs_yuv.progressive.y)
931                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.y);
932         if (c->cs_yuv.progressive.uv)
933                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.uv);
934         if (c->cs_yuv.progressive.u)
935                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.u);
936         if (c->cs_yuv.progressive.v)
937                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.progressive.v);
938         if (c->cs_rgb_yuv.y)
939                 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.y);
940         if (c->cs_rgb_yuv.uv)
941                 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.uv);
942         if (c->cs_rgb_yuv.u)
943                 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.u);
944         if (c->cs_rgb_yuv.v)
945                 c->pipe->delete_compute_state(c->pipe, c->cs_rgb_yuv.v);
946 }
947