• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 /**
30  * @file
31  * YUV pixel format manipulation.
32  *
33  * @author Jose Fonseca <jfonseca@vmware.com>
34  */
35 
36 
37 #include "util/format/u_format.h"
38 #include "util/u_cpu_detect.h"
39 
40 #include "lp_bld_arit.h"
41 #include "lp_bld_type.h"
42 #include "lp_bld_const.h"
43 #include "lp_bld_conv.h"
44 #include "lp_bld_gather.h"
45 #include "lp_bld_format.h"
46 #include "lp_bld_init.h"
47 #include "lp_bld_logic.h"
48 
49 /**
50  * Extract Y, U, V channels from packed UYVY.
51  * @param packed  is a <n x i32> vector with the packed UYVY blocks
52  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
53  */
54 static void
uyvy_to_yuv_soa(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i,LLVMValueRef * y,LLVMValueRef * u,LLVMValueRef * v)55 uyvy_to_yuv_soa(struct gallivm_state *gallivm,
56                 unsigned n,
57                 LLVMValueRef packed,
58                 LLVMValueRef i,
59                 LLVMValueRef *y,
60                 LLVMValueRef *u,
61                 LLVMValueRef *v)
62 {
63    LLVMBuilderRef builder = gallivm->builder;
64    struct lp_type type;
65    LLVMValueRef mask;
66 
67    memset(&type, 0, sizeof type);
68    type.width = 32;
69    type.length = n;
70 
71    assert(lp_check_value(type, packed));
72    assert(lp_check_value(type, i));
73 
74    /*
75     * Little endian:
76     * y = (uyvy >> (16*i + 8)) & 0xff
77     * u = (uyvy        ) & 0xff
78     * v = (uyvy >> 16  ) & 0xff
79     *
80     * Big endian:
81     * y = (uyvy >> (-16*i + 16)) & 0xff
82     * u = (uyvy >> 24) & 0xff
83     * v = (uyvy >>  8) & 0xff
84     */
85 
86 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
87    /*
88     * Avoid shift with per-element count.
89     * No support on x86, gets translated to roughly 5 instructions
90     * per element. Didn't measure performance but cuts shader size
91     * by quite a bit (less difference if cpu has no sse4.1 support).
92     */
93    if (util_get_cpu_caps()->has_sse2 && n > 1) {
94       LLVMValueRef sel, tmp, tmp2;
95       struct lp_build_context bld32;
96 
97       lp_build_context_init(&bld32, gallivm, type);
98 
99       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
100       tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
101       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
102       *y = lp_build_select(&bld32, sel, tmp, tmp2);
103    } else
104 #endif
105    {
106       LLVMValueRef shift;
107 #if UTIL_ARCH_LITTLE_ENDIAN
108       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
109       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
110 #else
111       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
112       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
113 #endif
114       *y = LLVMBuildLShr(builder, packed, shift, "");
115    }
116 
117 #if UTIL_ARCH_LITTLE_ENDIAN
118    *u = packed;
119    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
120 #else
121    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
122    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
123 #endif
124 
125    mask = lp_build_const_int_vec(gallivm, type, 0xff);
126 
127    *y = LLVMBuildAnd(builder, *y, mask, "y");
128    *u = LLVMBuildAnd(builder, *u, mask, "u");
129    *v = LLVMBuildAnd(builder, *v, mask, "v");
130 }
131 
132 
133 /**
134  * Extract Y, U, V channels from packed YUYV.
135  * @param packed  is a <n x i32> vector with the packed YUYV blocks
136  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
137  */
138 static void
yuyv_to_yuv_soa(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i,LLVMValueRef * y,LLVMValueRef * u,LLVMValueRef * v)139 yuyv_to_yuv_soa(struct gallivm_state *gallivm,
140                 unsigned n,
141                 LLVMValueRef packed,
142                 LLVMValueRef i,
143                 LLVMValueRef *y,
144                 LLVMValueRef *u,
145                 LLVMValueRef *v)
146 {
147    LLVMBuilderRef builder = gallivm->builder;
148    struct lp_type type;
149    LLVMValueRef mask;
150 
151    memset(&type, 0, sizeof type);
152    type.width = 32;
153    type.length = n;
154 
155    assert(lp_check_value(type, packed));
156    assert(lp_check_value(type, i));
157 
158    /*
159    * Little endian:
160     * y = (yuyv >> 16*i) & 0xff
161     * u = (yuyv >> 8   ) & 0xff
162     * v = (yuyv >> 24  ) & 0xff
163     *
164     * Big endian:
165     * y = (yuyv >> (-16*i + 24) & 0xff
166     * u = (yuyv >> 16)          & 0xff
167     * v = (yuyv)                & 0xff
168     */
169 
170 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
171    /*
172     * Avoid shift with per-element count.
173     * No support on x86, gets translated to roughly 5 instructions
174     * per element. Didn't measure performance but cuts shader size
175     * by quite a bit (less difference if cpu has no sse4.1 support).
176     */
177    if (util_get_cpu_caps()->has_sse2 && n > 1) {
178       LLVMValueRef sel, tmp;
179       struct lp_build_context bld32;
180 
181       lp_build_context_init(&bld32, gallivm, type);
182 
183       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
184       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
185        *y = lp_build_select(&bld32, sel, packed, tmp);
186    } else
187 #endif
188    {
189       LLVMValueRef shift;
190 #if UTIL_ARCH_LITTLE_ENDIAN
191       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
192 #else
193       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
194       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
195 #endif
196       *y = LLVMBuildLShr(builder, packed, shift, "");
197    }
198 
199 #if UTIL_ARCH_LITTLE_ENDIAN
200    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
201    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
202 #else
203    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
204    *v = packed;
205 #endif
206 
207    mask = lp_build_const_int_vec(gallivm, type, 0xff);
208 
209    *y = LLVMBuildAnd(builder, *y, mask, "y");
210    *u = LLVMBuildAnd(builder, *u, mask, "u");
211    *v = LLVMBuildAnd(builder, *v, mask, "v");
212 }
213 
214 
215 static inline void
yuv_to_rgb_soa(struct gallivm_state * gallivm,unsigned n,LLVMValueRef y,LLVMValueRef u,LLVMValueRef v,LLVMValueRef * r,LLVMValueRef * g,LLVMValueRef * b)216 yuv_to_rgb_soa(struct gallivm_state *gallivm,
217                unsigned n,
218                LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
219                LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
220 {
221    LLVMBuilderRef builder = gallivm->builder;
222    struct lp_type type;
223    struct lp_build_context bld;
224 
225    LLVMValueRef c0;
226    LLVMValueRef c8;
227    LLVMValueRef c16;
228    LLVMValueRef c128;
229    LLVMValueRef c255;
230 
231    LLVMValueRef cy;
232    LLVMValueRef cug;
233    LLVMValueRef cub;
234    LLVMValueRef cvr;
235    LLVMValueRef cvg;
236 
237    memset(&type, 0, sizeof type);
238    type.sign = TRUE;
239    type.width = 32;
240    type.length = n;
241 
242    lp_build_context_init(&bld, gallivm, type);
243 
244    assert(lp_check_value(type, y));
245    assert(lp_check_value(type, u));
246    assert(lp_check_value(type, v));
247 
248    /*
249     * Constants
250     */
251 
252    c0   = lp_build_const_int_vec(gallivm, type,   0);
253    c8   = lp_build_const_int_vec(gallivm, type,   8);
254    c16  = lp_build_const_int_vec(gallivm, type,  16);
255    c128 = lp_build_const_int_vec(gallivm, type, 128);
256    c255 = lp_build_const_int_vec(gallivm, type, 255);
257 
258    cy  = lp_build_const_int_vec(gallivm, type,  298);
259    cug = lp_build_const_int_vec(gallivm, type, -100);
260    cub = lp_build_const_int_vec(gallivm, type,  516);
261    cvr = lp_build_const_int_vec(gallivm, type,  409);
262    cvg = lp_build_const_int_vec(gallivm, type, -208);
263 
264    /*
265     *  y -= 16;
266     *  u -= 128;
267     *  v -= 128;
268     */
269 
270    y = LLVMBuildSub(builder, y, c16, "");
271    u = LLVMBuildSub(builder, u, c128, "");
272    v = LLVMBuildSub(builder, v, c128, "");
273 
274    /*
275     * r = 298 * _y            + 409 * _v + 128;
276     * g = 298 * _y - 100 * _u - 208 * _v + 128;
277     * b = 298 * _y + 516 * _u            + 128;
278     */
279 
280    y = LLVMBuildMul(builder, y, cy, "");
281    y = LLVMBuildAdd(builder, y, c128, "");
282 
283    *r = LLVMBuildMul(builder, v, cvr, "");
284    *g = LLVMBuildAdd(builder,
285                      LLVMBuildMul(builder, u, cug, ""),
286                      LLVMBuildMul(builder, v, cvg, ""),
287                      "");
288    *b = LLVMBuildMul(builder, u, cub, "");
289 
290    *r = LLVMBuildAdd(builder, *r, y, "");
291    *g = LLVMBuildAdd(builder, *g, y, "");
292    *b = LLVMBuildAdd(builder, *b, y, "");
293 
294    /*
295     * r >>= 8;
296     * g >>= 8;
297     * b >>= 8;
298     */
299 
300    *r = LLVMBuildAShr(builder, *r, c8, "r");
301    *g = LLVMBuildAShr(builder, *g, c8, "g");
302    *b = LLVMBuildAShr(builder, *b, c8, "b");
303 
304    /*
305     * Clamp
306     */
307 
308    *r = lp_build_clamp(&bld, *r, c0, c255);
309    *g = lp_build_clamp(&bld, *g, c0, c255);
310    *b = lp_build_clamp(&bld, *b, c0, c255);
311 }
312 
313 
314 static LLVMValueRef
rgb_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef r,LLVMValueRef g,LLVMValueRef b)315 rgb_to_rgba_aos(struct gallivm_state *gallivm,
316                 unsigned n,
317                 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
318 {
319    LLVMBuilderRef builder = gallivm->builder;
320    struct lp_type type;
321    LLVMValueRef a;
322    LLVMValueRef rgba;
323 
324    memset(&type, 0, sizeof type);
325    type.sign = TRUE;
326    type.width = 32;
327    type.length = n;
328 
329    assert(lp_check_value(type, r));
330    assert(lp_check_value(type, g));
331    assert(lp_check_value(type, b));
332 
333    /*
334     * Make a 4 x unorm8 vector
335     */
336 
337 #if UTIL_ARCH_LITTLE_ENDIAN
338    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
339    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
340    a = lp_build_const_int_vec(gallivm, type, 0xff000000);
341 #else
342    r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
343    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
344    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
345    a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
346 #endif
347 
348    rgba = r;
349    rgba = LLVMBuildOr(builder, rgba, g, "");
350    rgba = LLVMBuildOr(builder, rgba, b, "");
351    rgba = LLVMBuildOr(builder, rgba, a, "");
352 
353    rgba = LLVMBuildBitCast(builder, rgba,
354                            LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
355 
356    return rgba;
357 }
358 
359 
360 /**
361  * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
362  */
363 static LLVMValueRef
uyvy_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)364 uyvy_to_rgba_aos(struct gallivm_state *gallivm,
365                  unsigned n,
366                  LLVMValueRef packed,
367                  LLVMValueRef i)
368 {
369    LLVMValueRef y, u, v;
370    LLVMValueRef r, g, b;
371    LLVMValueRef rgba;
372 
373    uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
374    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
375    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
376 
377    return rgba;
378 }
379 
380 
381 /**
382  * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
383  */
384 static LLVMValueRef
yuyv_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)385 yuyv_to_rgba_aos(struct gallivm_state *gallivm,
386                  unsigned n,
387                  LLVMValueRef packed,
388                  LLVMValueRef i)
389 {
390    LLVMValueRef y, u, v;
391    LLVMValueRef r, g, b;
392    LLVMValueRef rgba;
393 
394    yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
395    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
396    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
397 
398    return rgba;
399 }
400 
401 
402 /**
403  * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
404  */
405 static LLVMValueRef
rgbg_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)406 rgbg_to_rgba_aos(struct gallivm_state *gallivm,
407                  unsigned n,
408                  LLVMValueRef packed,
409                  LLVMValueRef i)
410 {
411    LLVMValueRef r, g, b;
412    LLVMValueRef rgba;
413 
414    uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
415    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
416 
417    return rgba;
418 }
419 
420 
421 /**
422  * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
423  */
424 static LLVMValueRef
grgb_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)425 grgb_to_rgba_aos(struct gallivm_state *gallivm,
426                  unsigned n,
427                  LLVMValueRef packed,
428                  LLVMValueRef i)
429 {
430    LLVMValueRef r, g, b;
431    LLVMValueRef rgba;
432 
433    yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
434    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
435 
436    return rgba;
437 }
438 
439 /**
440  * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
441  */
442 static LLVMValueRef
grbr_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)443 grbr_to_rgba_aos(struct gallivm_state *gallivm,
444                  unsigned n,
445                  LLVMValueRef packed,
446                  LLVMValueRef i)
447 {
448    LLVMValueRef r, g, b;
449    LLVMValueRef rgba;
450 
451    uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
452    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
453 
454    return rgba;
455 }
456 
457 
458 /**
459  * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
460  */
461 static LLVMValueRef
rgrb_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)462 rgrb_to_rgba_aos(struct gallivm_state *gallivm,
463                  unsigned n,
464                  LLVMValueRef packed,
465                  LLVMValueRef i)
466 {
467    LLVMValueRef r, g, b;
468    LLVMValueRef rgba;
469 
470    yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
471    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
472 
473    return rgba;
474 }
475 
476 /**
477  * @param n  is the number of pixels processed
478  * @param packed  is a <n x i32> vector with the packed YUYV blocks
479  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
480  * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
481  */
482 LLVMValueRef
lp_build_fetch_subsampled_rgba_aos(struct gallivm_state * gallivm,const struct util_format_description * format_desc,unsigned n,LLVMValueRef base_ptr,LLVMValueRef offset,LLVMValueRef i,LLVMValueRef j)483 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
484                                    const struct util_format_description *format_desc,
485                                    unsigned n,
486                                    LLVMValueRef base_ptr,
487                                    LLVMValueRef offset,
488                                    LLVMValueRef i,
489                                    LLVMValueRef j)
490 {
491    LLVMValueRef packed;
492    LLVMValueRef rgba;
493    struct lp_type fetch_type;
494 
495    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
496    assert(format_desc->block.bits == 32);
497    assert(format_desc->block.width == 2);
498    assert(format_desc->block.height == 1);
499 
500    fetch_type = lp_type_uint(32);
501    packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE);
502 
503    (void)j;
504 
505    switch (format_desc->format) {
506    case PIPE_FORMAT_UYVY:
507       rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
508       break;
509    case PIPE_FORMAT_YUYV:
510       rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
511       break;
512    case PIPE_FORMAT_R8G8_B8G8_UNORM:
513       rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
514       break;
515    case PIPE_FORMAT_G8R8_G8B8_UNORM:
516       rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
517       break;
518    case PIPE_FORMAT_G8R8_B8R8_UNORM:
519       rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
520       break;
521    case PIPE_FORMAT_R8G8_R8B8_UNORM:
522       rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
523       break;
524    default:
525       assert(0);
526       rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
527       break;
528    }
529 
530    return rgba;
531 }
532 
533