1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * YUV pixel format manipulation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36
37 #include "util/format/u_format.h"
38 #include "util/u_cpu_detect.h"
39
40 #include "lp_bld_arit.h"
41 #include "lp_bld_type.h"
42 #include "lp_bld_const.h"
43 #include "lp_bld_conv.h"
44 #include "lp_bld_gather.h"
45 #include "lp_bld_format.h"
46 #include "lp_bld_init.h"
47 #include "lp_bld_logic.h"
48
49 /**
50 * Extract Y, U, V channels from packed UYVY.
51 * @param packed is a <n x i32> vector with the packed UYVY blocks
52 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
53 */
54 static void
uyvy_to_yuv_soa(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i,LLVMValueRef * y,LLVMValueRef * u,LLVMValueRef * v)55 uyvy_to_yuv_soa(struct gallivm_state *gallivm,
56 unsigned n,
57 LLVMValueRef packed,
58 LLVMValueRef i,
59 LLVMValueRef *y,
60 LLVMValueRef *u,
61 LLVMValueRef *v)
62 {
63 LLVMBuilderRef builder = gallivm->builder;
64 struct lp_type type;
65 LLVMValueRef mask;
66
67 memset(&type, 0, sizeof type);
68 type.width = 32;
69 type.length = n;
70
71 assert(lp_check_value(type, packed));
72 assert(lp_check_value(type, i));
73
74 /*
75 * Little endian:
76 * y = (uyvy >> (16*i + 8)) & 0xff
77 * u = (uyvy ) & 0xff
78 * v = (uyvy >> 16 ) & 0xff
79 *
80 * Big endian:
81 * y = (uyvy >> (-16*i + 16)) & 0xff
82 * u = (uyvy >> 24) & 0xff
83 * v = (uyvy >> 8) & 0xff
84 */
85
86 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
87 /*
88 * Avoid shift with per-element count.
89 * No support on x86, gets translated to roughly 5 instructions
90 * per element. Didn't measure performance but cuts shader size
91 * by quite a bit (less difference if cpu has no sse4.1 support).
92 */
93 if (util_get_cpu_caps()->has_sse2 && n > 1) {
94 LLVMValueRef sel, tmp, tmp2;
95 struct lp_build_context bld32;
96
97 lp_build_context_init(&bld32, gallivm, type);
98
99 tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
100 tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
101 sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
102 *y = lp_build_select(&bld32, sel, tmp, tmp2);
103 } else
104 #endif
105 {
106 LLVMValueRef shift;
107 #if UTIL_ARCH_LITTLE_ENDIAN
108 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
109 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
110 #else
111 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
112 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
113 #endif
114 *y = LLVMBuildLShr(builder, packed, shift, "");
115 }
116
117 #if UTIL_ARCH_LITTLE_ENDIAN
118 *u = packed;
119 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
120 #else
121 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
122 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
123 #endif
124
125 mask = lp_build_const_int_vec(gallivm, type, 0xff);
126
127 *y = LLVMBuildAnd(builder, *y, mask, "y");
128 *u = LLVMBuildAnd(builder, *u, mask, "u");
129 *v = LLVMBuildAnd(builder, *v, mask, "v");
130 }
131
132
133 /**
134 * Extract Y, U, V channels from packed YUYV.
135 * @param packed is a <n x i32> vector with the packed YUYV blocks
136 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
137 */
138 static void
yuyv_to_yuv_soa(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i,LLVMValueRef * y,LLVMValueRef * u,LLVMValueRef * v)139 yuyv_to_yuv_soa(struct gallivm_state *gallivm,
140 unsigned n,
141 LLVMValueRef packed,
142 LLVMValueRef i,
143 LLVMValueRef *y,
144 LLVMValueRef *u,
145 LLVMValueRef *v)
146 {
147 LLVMBuilderRef builder = gallivm->builder;
148 struct lp_type type;
149 LLVMValueRef mask;
150
151 memset(&type, 0, sizeof type);
152 type.width = 32;
153 type.length = n;
154
155 assert(lp_check_value(type, packed));
156 assert(lp_check_value(type, i));
157
158 /*
159 * Little endian:
160 * y = (yuyv >> 16*i) & 0xff
161 * u = (yuyv >> 8 ) & 0xff
162 * v = (yuyv >> 24 ) & 0xff
163 *
164 * Big endian:
165 * y = (yuyv >> (-16*i + 24) & 0xff
166 * u = (yuyv >> 16) & 0xff
167 * v = (yuyv) & 0xff
168 */
169
170 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
171 /*
172 * Avoid shift with per-element count.
173 * No support on x86, gets translated to roughly 5 instructions
174 * per element. Didn't measure performance but cuts shader size
175 * by quite a bit (less difference if cpu has no sse4.1 support).
176 */
177 if (util_get_cpu_caps()->has_sse2 && n > 1) {
178 LLVMValueRef sel, tmp;
179 struct lp_build_context bld32;
180
181 lp_build_context_init(&bld32, gallivm, type);
182
183 tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
184 sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
185 *y = lp_build_select(&bld32, sel, packed, tmp);
186 } else
187 #endif
188 {
189 LLVMValueRef shift;
190 #if UTIL_ARCH_LITTLE_ENDIAN
191 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
192 #else
193 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
194 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
195 #endif
196 *y = LLVMBuildLShr(builder, packed, shift, "");
197 }
198
199 #if UTIL_ARCH_LITTLE_ENDIAN
200 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
201 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
202 #else
203 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
204 *v = packed;
205 #endif
206
207 mask = lp_build_const_int_vec(gallivm, type, 0xff);
208
209 *y = LLVMBuildAnd(builder, *y, mask, "y");
210 *u = LLVMBuildAnd(builder, *u, mask, "u");
211 *v = LLVMBuildAnd(builder, *v, mask, "v");
212 }
213
214
215 static inline void
yuv_to_rgb_soa(struct gallivm_state * gallivm,unsigned n,LLVMValueRef y,LLVMValueRef u,LLVMValueRef v,LLVMValueRef * r,LLVMValueRef * g,LLVMValueRef * b)216 yuv_to_rgb_soa(struct gallivm_state *gallivm,
217 unsigned n,
218 LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
219 LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
220 {
221 LLVMBuilderRef builder = gallivm->builder;
222 struct lp_type type;
223 struct lp_build_context bld;
224
225 LLVMValueRef c0;
226 LLVMValueRef c8;
227 LLVMValueRef c16;
228 LLVMValueRef c128;
229 LLVMValueRef c255;
230
231 LLVMValueRef cy;
232 LLVMValueRef cug;
233 LLVMValueRef cub;
234 LLVMValueRef cvr;
235 LLVMValueRef cvg;
236
237 memset(&type, 0, sizeof type);
238 type.sign = TRUE;
239 type.width = 32;
240 type.length = n;
241
242 lp_build_context_init(&bld, gallivm, type);
243
244 assert(lp_check_value(type, y));
245 assert(lp_check_value(type, u));
246 assert(lp_check_value(type, v));
247
248 /*
249 * Constants
250 */
251
252 c0 = lp_build_const_int_vec(gallivm, type, 0);
253 c8 = lp_build_const_int_vec(gallivm, type, 8);
254 c16 = lp_build_const_int_vec(gallivm, type, 16);
255 c128 = lp_build_const_int_vec(gallivm, type, 128);
256 c255 = lp_build_const_int_vec(gallivm, type, 255);
257
258 cy = lp_build_const_int_vec(gallivm, type, 298);
259 cug = lp_build_const_int_vec(gallivm, type, -100);
260 cub = lp_build_const_int_vec(gallivm, type, 516);
261 cvr = lp_build_const_int_vec(gallivm, type, 409);
262 cvg = lp_build_const_int_vec(gallivm, type, -208);
263
264 /*
265 * y -= 16;
266 * u -= 128;
267 * v -= 128;
268 */
269
270 y = LLVMBuildSub(builder, y, c16, "");
271 u = LLVMBuildSub(builder, u, c128, "");
272 v = LLVMBuildSub(builder, v, c128, "");
273
274 /*
275 * r = 298 * _y + 409 * _v + 128;
276 * g = 298 * _y - 100 * _u - 208 * _v + 128;
277 * b = 298 * _y + 516 * _u + 128;
278 */
279
280 y = LLVMBuildMul(builder, y, cy, "");
281 y = LLVMBuildAdd(builder, y, c128, "");
282
283 *r = LLVMBuildMul(builder, v, cvr, "");
284 *g = LLVMBuildAdd(builder,
285 LLVMBuildMul(builder, u, cug, ""),
286 LLVMBuildMul(builder, v, cvg, ""),
287 "");
288 *b = LLVMBuildMul(builder, u, cub, "");
289
290 *r = LLVMBuildAdd(builder, *r, y, "");
291 *g = LLVMBuildAdd(builder, *g, y, "");
292 *b = LLVMBuildAdd(builder, *b, y, "");
293
294 /*
295 * r >>= 8;
296 * g >>= 8;
297 * b >>= 8;
298 */
299
300 *r = LLVMBuildAShr(builder, *r, c8, "r");
301 *g = LLVMBuildAShr(builder, *g, c8, "g");
302 *b = LLVMBuildAShr(builder, *b, c8, "b");
303
304 /*
305 * Clamp
306 */
307
308 *r = lp_build_clamp(&bld, *r, c0, c255);
309 *g = lp_build_clamp(&bld, *g, c0, c255);
310 *b = lp_build_clamp(&bld, *b, c0, c255);
311 }
312
313
314 static LLVMValueRef
rgb_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef r,LLVMValueRef g,LLVMValueRef b)315 rgb_to_rgba_aos(struct gallivm_state *gallivm,
316 unsigned n,
317 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
318 {
319 LLVMBuilderRef builder = gallivm->builder;
320 struct lp_type type;
321 LLVMValueRef a;
322 LLVMValueRef rgba;
323
324 memset(&type, 0, sizeof type);
325 type.sign = TRUE;
326 type.width = 32;
327 type.length = n;
328
329 assert(lp_check_value(type, r));
330 assert(lp_check_value(type, g));
331 assert(lp_check_value(type, b));
332
333 /*
334 * Make a 4 x unorm8 vector
335 */
336
337 #if UTIL_ARCH_LITTLE_ENDIAN
338 g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
339 b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
340 a = lp_build_const_int_vec(gallivm, type, 0xff000000);
341 #else
342 r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
343 g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
344 b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
345 a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
346 #endif
347
348 rgba = r;
349 rgba = LLVMBuildOr(builder, rgba, g, "");
350 rgba = LLVMBuildOr(builder, rgba, b, "");
351 rgba = LLVMBuildOr(builder, rgba, a, "");
352
353 rgba = LLVMBuildBitCast(builder, rgba,
354 LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
355
356 return rgba;
357 }
358
359
360 /**
361 * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
362 */
363 static LLVMValueRef
uyvy_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)364 uyvy_to_rgba_aos(struct gallivm_state *gallivm,
365 unsigned n,
366 LLVMValueRef packed,
367 LLVMValueRef i)
368 {
369 LLVMValueRef y, u, v;
370 LLVMValueRef r, g, b;
371 LLVMValueRef rgba;
372
373 uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
374 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
375 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
376
377 return rgba;
378 }
379
380
381 /**
382 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
383 */
384 static LLVMValueRef
yuyv_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)385 yuyv_to_rgba_aos(struct gallivm_state *gallivm,
386 unsigned n,
387 LLVMValueRef packed,
388 LLVMValueRef i)
389 {
390 LLVMValueRef y, u, v;
391 LLVMValueRef r, g, b;
392 LLVMValueRef rgba;
393
394 yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
395 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
396 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
397
398 return rgba;
399 }
400
401
402 /**
403 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
404 */
405 static LLVMValueRef
rgbg_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)406 rgbg_to_rgba_aos(struct gallivm_state *gallivm,
407 unsigned n,
408 LLVMValueRef packed,
409 LLVMValueRef i)
410 {
411 LLVMValueRef r, g, b;
412 LLVMValueRef rgba;
413
414 uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
415 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
416
417 return rgba;
418 }
419
420
421 /**
422 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
423 */
424 static LLVMValueRef
grgb_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)425 grgb_to_rgba_aos(struct gallivm_state *gallivm,
426 unsigned n,
427 LLVMValueRef packed,
428 LLVMValueRef i)
429 {
430 LLVMValueRef r, g, b;
431 LLVMValueRef rgba;
432
433 yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
434 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
435
436 return rgba;
437 }
438
439 /**
440 * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
441 */
442 static LLVMValueRef
grbr_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)443 grbr_to_rgba_aos(struct gallivm_state *gallivm,
444 unsigned n,
445 LLVMValueRef packed,
446 LLVMValueRef i)
447 {
448 LLVMValueRef r, g, b;
449 LLVMValueRef rgba;
450
451 uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
452 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
453
454 return rgba;
455 }
456
457
458 /**
459 * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
460 */
461 static LLVMValueRef
rgrb_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)462 rgrb_to_rgba_aos(struct gallivm_state *gallivm,
463 unsigned n,
464 LLVMValueRef packed,
465 LLVMValueRef i)
466 {
467 LLVMValueRef r, g, b;
468 LLVMValueRef rgba;
469
470 yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
471 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
472
473 return rgba;
474 }
475
476 /**
477 * @param n is the number of pixels processed
478 * @param packed is a <n x i32> vector with the packed YUYV blocks
479 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
480 * @return a <4*n x i8> vector with the pixel RGBA values in AoS
481 */
482 LLVMValueRef
lp_build_fetch_subsampled_rgba_aos(struct gallivm_state * gallivm,const struct util_format_description * format_desc,unsigned n,LLVMValueRef base_ptr,LLVMValueRef offset,LLVMValueRef i,LLVMValueRef j)483 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
484 const struct util_format_description *format_desc,
485 unsigned n,
486 LLVMValueRef base_ptr,
487 LLVMValueRef offset,
488 LLVMValueRef i,
489 LLVMValueRef j)
490 {
491 LLVMValueRef packed;
492 LLVMValueRef rgba;
493 struct lp_type fetch_type;
494
495 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
496 assert(format_desc->block.bits == 32);
497 assert(format_desc->block.width == 2);
498 assert(format_desc->block.height == 1);
499
500 fetch_type = lp_type_uint(32);
501 packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE);
502
503 (void)j;
504
505 switch (format_desc->format) {
506 case PIPE_FORMAT_UYVY:
507 rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
508 break;
509 case PIPE_FORMAT_YUYV:
510 rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
511 break;
512 case PIPE_FORMAT_R8G8_B8G8_UNORM:
513 rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
514 break;
515 case PIPE_FORMAT_G8R8_G8B8_UNORM:
516 rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
517 break;
518 case PIPE_FORMAT_G8R8_B8R8_UNORM:
519 rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
520 break;
521 case PIPE_FORMAT_R8G8_R8B8_UNORM:
522 rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
523 break;
524 default:
525 assert(0);
526 rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
527 break;
528 }
529
530 return rgba;
531 }
532
533