• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2016 Nayan Deshmukh.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include <stdio.h>
29 
30 #include "pipe/p_context.h"
31 
32 #include "tgsi/tgsi_ureg.h"
33 
34 #include "util/u_draw.h"
35 #include "util/u_memory.h"
36 #include "util/u_math.h"
37 #include "util/u_rect.h"
38 #include "util/u_upload_mgr.h"
39 
40 #include "vl_types.h"
41 #include "vl_vertex_buffers.h"
42 #include "vl_bicubic_filter.h"
43 
44 enum VS_OUTPUT
45 {
46    VS_O_VPOS = 0,
47    VS_O_VTEX = 0
48 };
49 
50 static void *
create_vert_shader(struct vl_bicubic_filter * filter)51 create_vert_shader(struct vl_bicubic_filter *filter)
52 {
53    struct ureg_program *shader;
54    struct ureg_src i_vpos;
55    struct ureg_dst o_vpos, o_vtex;
56 
57    shader = ureg_create(PIPE_SHADER_VERTEX);
58    if (!shader)
59       return NULL;
60 
61    i_vpos = ureg_DECL_vs_input(shader, 0);
62    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
63    o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX);
64 
65    ureg_MOV(shader, o_vpos, i_vpos);
66    ureg_MOV(shader, o_vtex, i_vpos);
67 
68    ureg_END(shader);
69 
70    return ureg_create_shader_and_destroy(shader, filter->pipe);
71 }
72 
73 static void
create_frag_shader_cubic_interpolater(struct ureg_program * shader,struct ureg_src tex_a,struct ureg_src tex_b,struct ureg_src tex_c,struct ureg_src tex_d,struct ureg_src t,struct ureg_dst o_fragment)74 create_frag_shader_cubic_interpolater(struct ureg_program *shader, struct ureg_src tex_a,
75                                       struct ureg_src tex_b, struct ureg_src tex_c,
76                                       struct ureg_src tex_d, struct ureg_src t,
77                                       struct ureg_dst o_fragment)
78 {
79    struct ureg_dst temp[11];
80    struct ureg_dst t_2;
81    unsigned i;
82 
83    for(i = 0; i < 11; ++i)
84        temp[i] = ureg_DECL_temporary(shader);
85    t_2 = ureg_DECL_temporary(shader);
86 
87    /*
88     * |temp[0]|   |  0  2  0  0 |  |tex_a|
89     * |temp[1]| = | -1  0  1  0 |* |tex_b|
90     * |temp[2]|   |  2 -5  4 -1 |  |tex_c|
91     * |temp[3]|   | -1  3 -3  1 |  |tex_d|
92     */
93    ureg_MUL(shader, temp[0], tex_b, ureg_imm1f(shader, 2.0f));
94 
95    ureg_MUL(shader, temp[1], tex_a, ureg_imm1f(shader, -1.0f));
96    ureg_MAD(shader, temp[1], tex_c, ureg_imm1f(shader, 1.0f),
97             ureg_src(temp[1]));
98 
99    ureg_MUL(shader, temp[2], tex_a, ureg_imm1f(shader, 2.0f));
100    ureg_MAD(shader, temp[2], tex_b, ureg_imm1f(shader, -5.0f),
101             ureg_src(temp[2]));
102    ureg_MAD(shader, temp[2], tex_c, ureg_imm1f(shader, 4.0f),
103             ureg_src(temp[2]));
104    ureg_MAD(shader, temp[2], tex_d, ureg_imm1f(shader, -1.0f),
105              ureg_src(temp[2]));
106 
107    ureg_MUL(shader, temp[3], tex_a, ureg_imm1f(shader, -1.0f));
108    ureg_MAD(shader, temp[3], tex_b, ureg_imm1f(shader, 3.0f),
109             ureg_src(temp[3]));
110    ureg_MAD(shader, temp[3], tex_c, ureg_imm1f(shader, -3.0f),
111             ureg_src(temp[3]));
112    ureg_MAD(shader, temp[3], tex_d, ureg_imm1f(shader, 1.0f),
113             ureg_src(temp[3]));
114 
115    /*
116     * t_2 = t*t
117     * o_fragment = 0.5*|1  t  t^2  t^3|*|temp[0]|
118     *                                   |temp[1]|
119     *                                   |temp[2]|
120     *                                   |temp[3]|
121     */
122 
123    ureg_MUL(shader, t_2, t, t);
124    ureg_MUL(shader, temp[4], ureg_src(t_2), t);
125 
126    ureg_MUL(shader, temp[4], ureg_src(temp[4]),
127             ureg_src(temp[3]));
128    ureg_MUL(shader, temp[5], ureg_src(t_2),
129             ureg_src(temp[2]));
130    ureg_MUL(shader, temp[6], t,
131             ureg_src(temp[1]));
132    ureg_MUL(shader, temp[7], ureg_imm1f(shader, 1.0f),
133             ureg_src(temp[0]));
134    ureg_ADD(shader, temp[8], ureg_src(temp[4]),
135             ureg_src(temp[5]));
136    ureg_ADD(shader, temp[9], ureg_src(temp[6]),
137             ureg_src(temp[7]));
138 
139    ureg_ADD(shader, temp[10], ureg_src(temp[8]),
140             ureg_src(temp[9]));
141    ureg_MUL(shader, o_fragment, ureg_src(temp[10]),
142             ureg_imm1f(shader, 0.5f));
143 
144 
145    for(i = 0; i < 11; ++i)
146        ureg_release_temporary(shader, temp[i]);
147    ureg_release_temporary(shader, t_2);
148 }
149 
150 static void *
create_frag_shader(struct vl_bicubic_filter * filter,unsigned video_width,unsigned video_height,struct vertex2f * offsets)151 create_frag_shader(struct vl_bicubic_filter *filter, unsigned video_width,
152                    unsigned video_height, struct vertex2f *offsets)
153 {
154    struct pipe_screen *screen = filter->pipe->screen;
155    struct ureg_program *shader;
156    struct ureg_src i_vtex, vtex;
157    struct ureg_src sampler;
158    struct ureg_src half_pixel;
159    struct ureg_dst t_array[23];
160    struct ureg_dst o_fragment;
161    struct ureg_dst t;
162    unsigned i;
163 
164    if (screen->get_shader_param(
165       screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS) < 23) {
166 
167       return NULL;
168    }
169 
170    shader = ureg_create(PIPE_SHADER_FRAGMENT);
171    if (!shader) {
172       return NULL;
173    }
174 
175    i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
176    sampler = ureg_DECL_sampler(shader, 0);
177    ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D,
178                           TGSI_RETURN_TYPE_FLOAT,
179                           TGSI_RETURN_TYPE_FLOAT,
180                           TGSI_RETURN_TYPE_FLOAT,
181                           TGSI_RETURN_TYPE_FLOAT);
182 
183    for (i = 0; i < 23; ++i)
184       t_array[i] = ureg_DECL_temporary(shader);
185    t = ureg_DECL_temporary(shader);
186 
187    half_pixel = ureg_DECL_constant(shader, 0);
188    o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
189 
190    /*
191     * temp = (i_vtex - (0.5/dst_size)) * i_size)
192     * t = frac(temp)
193     * vtex = floor(i_vtex)/i_size
194     */
195    ureg_ADD(shader, ureg_writemask(t_array[21], TGSI_WRITEMASK_XY),
196             i_vtex, ureg_negate(half_pixel));
197    ureg_MUL(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
198             ureg_src(t_array[21]), ureg_imm2f(shader, video_width, video_height));
199    ureg_FRC(shader, ureg_writemask(t, TGSI_WRITEMASK_XY),
200             ureg_src(t_array[22]));
201 
202    ureg_FLR(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
203             ureg_src(t_array[22]));
204 
205    ureg_MAD(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
206             ureg_src(t_array[22]),
207             ureg_imm2f(shader, 1.0f / video_width, 1.0f / video_height),
208             half_pixel);
209 
210    /*
211     * t_array[0..*] = vtex + offset[0..*]
212     * t_array[0..*] = tex(t_array[0..*], sampler)
213     * t_array[16+i] = cubic_interpolate(t_array[4*i..4*i+3], t_x)
214     * o_fragment = cubic_interpolate(t_array[16..19], t_y)
215     */
216    vtex = ureg_src(t_array[22]);
217    for (i = 0; i < 16; ++i) {
218         ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY),
219                   vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y));
220         ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW),
221                   ureg_imm1f(shader, 0.0f));
222    }
223 
224    for (i = 0; i < 16; ++i) {
225       ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, ureg_src(t_array[i]), sampler);
226    }
227 
228    for(i = 0; i < 4; ++i)
229       create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[4*i]),
230               ureg_src(t_array[4*i+1]), ureg_src(t_array[4*i+2]), ureg_src(t_array[4*i+3]),
231               ureg_scalar(ureg_src(t), TGSI_SWIZZLE_X), t_array[16+i]);
232 
233    create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[16]),
234             ureg_src(t_array[17]), ureg_src(t_array[18]), ureg_src(t_array[19]),
235             ureg_scalar(ureg_src(t), TGSI_SWIZZLE_Y), o_fragment);
236 
237    for(i = 0; i < 23; ++i)
238        ureg_release_temporary(shader, t_array[i]);
239    ureg_release_temporary(shader, t);
240 
241    ureg_END(shader);
242 
243    return ureg_create_shader_and_destroy(shader, filter->pipe);
244 }
245 
246 bool
vl_bicubic_filter_init(struct vl_bicubic_filter * filter,struct pipe_context * pipe,unsigned width,unsigned height)247 vl_bicubic_filter_init(struct vl_bicubic_filter *filter, struct pipe_context *pipe,
248                       unsigned width, unsigned height)
249 {
250    struct pipe_rasterizer_state rs_state;
251    struct pipe_blend_state blend;
252    struct vertex2f offsets[16];
253    struct pipe_sampler_state sampler;
254    struct pipe_vertex_element ve;
255    unsigned i;
256 
257    assert(filter && pipe);
258    assert(width && height);
259 
260    memset(filter, 0, sizeof(*filter));
261    filter->pipe = pipe;
262 
263    memset(&rs_state, 0, sizeof(rs_state));
264    rs_state.half_pixel_center = true;
265    rs_state.bottom_edge_rule = true;
266    rs_state.depth_clip_near = 1;
267    rs_state.depth_clip_far = 1;
268 
269    filter->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
270    if (!filter->rs_state)
271       goto error_rs_state;
272 
273    memset(&blend, 0, sizeof blend);
274    blend.rt[0].rgb_func = PIPE_BLEND_ADD;
275    blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
276    blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
277    blend.rt[0].alpha_func = PIPE_BLEND_ADD;
278    blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
279    blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
280    blend.logicop_func = PIPE_LOGICOP_CLEAR;
281    blend.rt[0].colormask = PIPE_MASK_RGBA;
282    filter->blend = pipe->create_blend_state(pipe, &blend);
283    if (!filter->blend)
284       goto error_blend;
285 
286    memset(&sampler, 0, sizeof(sampler));
287    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
288    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
289    sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
290    sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
291    sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
292    sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
293    sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
294    sampler.compare_func = PIPE_FUNC_ALWAYS;
295    sampler.normalized_coords = 1;
296    filter->sampler = pipe->create_sampler_state(pipe, &sampler);
297    if (!filter->sampler)
298       goto error_sampler;
299 
300    filter->quad = vl_vb_upload_quads(pipe);
301    if(!filter->quad.buffer.resource)
302       goto error_quad;
303 
304    memset(&ve, 0, sizeof(ve));
305    ve.src_offset = 0;
306    ve.instance_divisor = 0;
307    ve.vertex_buffer_index = 0;
308    ve.src_format = PIPE_FORMAT_R32G32_FLOAT;
309    filter->ves = pipe->create_vertex_elements_state(pipe, 1, &ve);
310    if (!filter->ves)
311       goto error_ves;
312 
313    offsets[0].x = -1.0f; offsets[0].y = -1.0f;
314    offsets[1].x = 0.0f; offsets[1].y = -1.0f;
315    offsets[2].x = 1.0f; offsets[2].y = -1.0f;
316    offsets[3].x = 2.0f; offsets[3].y = -1.0f;
317 
318    offsets[4].x = -1.0f; offsets[4].y = 0.0f;
319    offsets[5].x = 0.0f; offsets[5].y = 0.0f;
320    offsets[6].x = 1.0f; offsets[6].y = 0.0f;
321    offsets[7].x = 2.0f; offsets[7].y = 0.0f;
322 
323    offsets[8].x = -1.0f; offsets[8].y = 1.0f;
324    offsets[9].x = 0.0f; offsets[9].y = 1.0f;
325    offsets[10].x = 1.0f; offsets[10].y = 1.0f;
326    offsets[11].x = 2.0f; offsets[11].y = 1.0f;
327 
328    offsets[12].x = -1.0f; offsets[12].y = 2.0f;
329    offsets[13].x = 0.0f; offsets[13].y = 2.0f;
330    offsets[14].x = 1.0f; offsets[14].y = 2.0f;
331    offsets[15].x = 2.0f; offsets[15].y = 2.0f;
332 
333    for (i = 0; i < 16; ++i) {
334       offsets[i].x /= width;
335       offsets[i].y /= height;
336    }
337 
338    filter->vs = create_vert_shader(filter);
339    if (!filter->vs)
340       goto error_vs;
341 
342    filter->fs = create_frag_shader(filter, width, height, offsets);
343    if (!filter->fs)
344       goto error_fs;
345 
346    return true;
347 
348 error_fs:
349    pipe->delete_vs_state(pipe, filter->vs);
350 
351 error_vs:
352    pipe->delete_vertex_elements_state(pipe, filter->ves);
353 
354 error_ves:
355    pipe_resource_reference(&filter->quad.buffer.resource, NULL);
356 
357 error_quad:
358    pipe->delete_sampler_state(pipe, filter->sampler);
359 
360 error_sampler:
361    pipe->delete_blend_state(pipe, filter->blend);
362 
363 error_blend:
364    pipe->delete_rasterizer_state(pipe, filter->rs_state);
365 
366 error_rs_state:
367    return false;
368 }
369 
370 void
vl_bicubic_filter_cleanup(struct vl_bicubic_filter * filter)371 vl_bicubic_filter_cleanup(struct vl_bicubic_filter *filter)
372 {
373    assert(filter);
374 
375    filter->pipe->delete_sampler_state(filter->pipe, filter->sampler);
376    filter->pipe->delete_blend_state(filter->pipe, filter->blend);
377    filter->pipe->delete_rasterizer_state(filter->pipe, filter->rs_state);
378    filter->pipe->delete_vertex_elements_state(filter->pipe, filter->ves);
379    pipe_resource_reference(&filter->quad.buffer.resource, NULL);
380 
381    filter->pipe->delete_vs_state(filter->pipe, filter->vs);
382    filter->pipe->delete_fs_state(filter->pipe, filter->fs);
383 }
384 
385 void
vl_bicubic_filter_render(struct vl_bicubic_filter * filter,struct pipe_sampler_view * src,struct pipe_surface * dst,struct u_rect * dst_area,struct u_rect * dst_clip)386 vl_bicubic_filter_render(struct vl_bicubic_filter *filter,
387                         struct pipe_sampler_view *src,
388                         struct pipe_surface *dst,
389                         struct u_rect *dst_area,
390                         struct u_rect *dst_clip)
391 {
392    struct pipe_viewport_state viewport;
393    struct pipe_framebuffer_state fb_state;
394    struct pipe_scissor_state scissor;
395    union pipe_color_union clear_color;
396 
397    assert(filter && src && dst);
398 
399    if (dst_clip) {
400       scissor.minx = dst_clip->x0;
401       scissor.miny = dst_clip->y0;
402       scissor.maxx = dst_clip->x1;
403       scissor.maxy = dst_clip->y1;
404    } else {
405       scissor.minx = 0;
406       scissor.miny = 0;
407       scissor.maxx = dst->width;
408       scissor.maxy = dst->height;
409    }
410 
411    clear_color.f[0] = clear_color.f[1] = 0.0f;
412    clear_color.f[2] = clear_color.f[3] = 0.0f;
413 
414    memset(&viewport, 0, sizeof(viewport));
415    if(dst_area){
416       viewport.scale[0] = dst_area->x1 - dst_area->x0;
417       viewport.scale[1] = dst_area->y1 - dst_area->y0;
418       viewport.translate[0] = dst_area->x0;
419       viewport.translate[1] = dst_area->y0;
420    } else {
421       viewport.scale[0] = dst->width;
422       viewport.scale[1] = dst->height;
423    }
424    viewport.scale[2] = 1;
425    viewport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;
426    viewport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;
427    viewport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;
428    viewport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;
429 
430    struct pipe_constant_buffer cb = {0};
431    float *ptr = NULL;
432 
433    u_upload_alloc(filter->pipe->const_uploader, 0, 2 * sizeof(float), 256,
434                   &cb.buffer_offset, &cb.buffer, (void**)&ptr);
435    cb.buffer_size = 2 * sizeof(float);
436 
437    if (ptr) {
438       ptr[0] = 0.5f/viewport.scale[0];
439       ptr[1] = 0.5f/viewport.scale[1];
440    }
441    u_upload_unmap(filter->pipe->const_uploader);
442 
443    memset(&fb_state, 0, sizeof(fb_state));
444    fb_state.width = dst->width;
445    fb_state.height = dst->height;
446    fb_state.nr_cbufs = 1;
447    fb_state.cbufs[0] = dst;
448 
449    filter->pipe->set_scissor_states(filter->pipe, 0, 1, &scissor);
450    filter->pipe->clear_render_target(filter->pipe, dst, &clear_color,
451                                      0, 0, dst->width, dst->height, false);
452    filter->pipe->set_constant_buffer(filter->pipe, PIPE_SHADER_FRAGMENT,
453                                      0, false, &cb);
454    filter->pipe->bind_rasterizer_state(filter->pipe, filter->rs_state);
455    filter->pipe->bind_blend_state(filter->pipe, filter->blend);
456    filter->pipe->bind_sampler_states(filter->pipe, PIPE_SHADER_FRAGMENT,
457                                      0, 1, &filter->sampler);
458    filter->pipe->set_sampler_views(filter->pipe, PIPE_SHADER_FRAGMENT,
459                                    0, 1, 0, false, &src);
460    filter->pipe->bind_vs_state(filter->pipe, filter->vs);
461    filter->pipe->bind_fs_state(filter->pipe, filter->fs);
462    filter->pipe->set_framebuffer_state(filter->pipe, &fb_state);
463    filter->pipe->set_viewport_states(filter->pipe, 0, 1, &viewport);
464    filter->pipe->set_vertex_buffers(filter->pipe, 0, 1, 0, false, &filter->quad);
465    filter->pipe->bind_vertex_elements_state(filter->pipe, filter->ves);
466 
467    util_draw_arrays(filter->pipe, PIPE_PRIM_QUADS, 0, 4);
468 }
469