1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Helper functions for swizzling/shuffling.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35 #include <inttypes.h> /* for PRIx64 macro */
36 #include "util/compiler.h"
37 #include "util/u_debug.h"
38
39 #include "lp_bld_type.h"
40 #include "lp_bld_const.h"
41 #include "lp_bld_init.h"
42 #include "lp_bld_logic.h"
43 #include "lp_bld_swizzle.h"
44 #include "lp_bld_pack.h"
45
46
47 LLVMValueRef
lp_build_broadcast(struct gallivm_state * gallivm,LLVMTypeRef vec_type,LLVMValueRef scalar)48 lp_build_broadcast(struct gallivm_state *gallivm,
49 LLVMTypeRef vec_type,
50 LLVMValueRef scalar)
51 {
52 LLVMValueRef res;
53
54 if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
55 /* scalar */
56 assert(vec_type == LLVMTypeOf(scalar));
57 res = scalar;
58 } else {
59 LLVMBuilderRef builder = gallivm->builder;
60 const unsigned length = LLVMGetVectorSize(vec_type);
61 LLVMValueRef undef = LLVMGetUndef(vec_type);
62 /* The shuffle vector is always made of int32 elements */
63 LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
64 LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
65
66 assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
67
68 res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
69 res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
70 }
71
72 return res;
73 }
74
75
76 /**
77 * Broadcast
78 */
79 LLVMValueRef
lp_build_broadcast_scalar(struct lp_build_context * bld,LLVMValueRef scalar)80 lp_build_broadcast_scalar(struct lp_build_context *bld,
81 LLVMValueRef scalar)
82 {
83 assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
84
85 return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
86 }
87
88
89 /**
90 * Combined extract and broadcast (mere shuffle in most cases)
91 */
92 LLVMValueRef
lp_build_extract_broadcast(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,LLVMValueRef vector,LLVMValueRef index)93 lp_build_extract_broadcast(struct gallivm_state *gallivm,
94 struct lp_type src_type,
95 struct lp_type dst_type,
96 LLVMValueRef vector,
97 LLVMValueRef index)
98 {
99 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
100 LLVMValueRef res;
101
102 assert(src_type.floating == dst_type.floating);
103 assert(src_type.width == dst_type.width);
104
105 assert(lp_check_value(src_type, vector));
106 assert(LLVMTypeOf(index) == i32t);
107
108 if (src_type.length == 1) {
109 if (dst_type.length == 1) {
110 /*
111 * Trivial scalar -> scalar.
112 */
113
114 res = vector;
115 }
116 else {
117 /*
118 * Broadcast scalar -> vector.
119 */
120
121 res = lp_build_broadcast(gallivm,
122 lp_build_vec_type(gallivm, dst_type),
123 vector);
124 }
125 }
126 else {
127 if (dst_type.length > 1) {
128 /*
129 * shuffle - result can be of different length.
130 */
131
132 LLVMValueRef shuffle;
133 shuffle = lp_build_broadcast(gallivm,
134 LLVMVectorType(i32t, dst_type.length),
135 index);
136 res = LLVMBuildShuffleVector(gallivm->builder, vector,
137 LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
138 shuffle, "");
139 }
140 else {
141 /*
142 * Trivial extract scalar from vector.
143 */
144 res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
145 }
146 }
147
148 return res;
149 }
150
151
152 /**
153 * Swizzle one channel into other channels.
154 */
155 LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context * bld,LLVMValueRef a,unsigned channel,unsigned num_channels)156 lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
157 LLVMValueRef a,
158 unsigned channel,
159 unsigned num_channels)
160 {
161 LLVMBuilderRef builder = bld->gallivm->builder;
162 const struct lp_type type = bld->type;
163 const unsigned n = type.length;
164 unsigned i, j;
165
166 if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
167 return a;
168
169 assert(num_channels == 2 || num_channels == 4);
170
171 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
172 * using shuffles here actually causes worst results. More investigation is
173 * needed. */
174 if (LLVMIsConstant(a) ||
175 type.width >= 16) {
176 /*
177 * Shuffle.
178 */
179 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
180 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
181
182 for(j = 0; j < n; j += num_channels)
183 for(i = 0; i < num_channels; ++i)
184 shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
185
186 return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
187 }
188 else if (num_channels == 2) {
189 /*
190 * Bit mask and shifts
191 *
192 * XY XY .... XY <= input
193 * 0Y 0Y .... 0Y
194 * YY YY .... YY
195 * YY YY .... YY <= output
196 */
197 struct lp_type type2;
198 LLVMValueRef tmp = NULL;
199 int shift;
200
201 a = LLVMBuildAnd(builder, a,
202 lp_build_const_mask_aos(bld->gallivm,
203 type, 1 << channel, num_channels), "");
204
205 type2 = type;
206 type2.floating = FALSE;
207 type2.width *= 2;
208 type2.length /= 2;
209
210 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
211
212 /*
213 * Vector element 0 is always channel X.
214 *
215 * 76 54 32 10 (array numbering)
216 * Little endian reg in: YX YX YX YX
217 * Little endian reg out: YY YY YY YY if shift right (shift == -1)
218 * XX XX XX XX if shift left (shift == 1)
219 *
220 * 01 23 45 67 (array numbering)
221 * Big endian reg in: XY XY XY XY
222 * Big endian reg out: YY YY YY YY if shift left (shift == 1)
223 * XX XX XX XX if shift right (shift == -1)
224 *
225 */
226 #if UTIL_ARCH_LITTLE_ENDIAN
227 shift = channel == 0 ? 1 : -1;
228 #else
229 shift = channel == 0 ? -1 : 1;
230 #endif
231
232 if (shift > 0) {
233 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
234 } else if (shift < 0) {
235 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
236 }
237
238 assert(tmp);
239 if (tmp) {
240 a = LLVMBuildOr(builder, a, tmp, "");
241 }
242
243 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
244 }
245 else {
246 /*
247 * Bit mask and recursive shifts
248 *
249 * Little-endian registers:
250 *
251 * 7654 3210
252 * WZYX WZYX .... WZYX <= input
253 * 00Y0 00Y0 .... 00Y0 <= mask
254 * 00YY 00YY .... 00YY <= shift right 1 (shift amount -1)
255 * YYYY YYYY .... YYYY <= shift left 2 (shift amount 2)
256 *
257 * Big-endian registers:
258 *
259 * 0123 4567
260 * XYZW XYZW .... XYZW <= input
261 * 0Y00 0Y00 .... 0Y00 <= mask
262 * YY00 YY00 .... YY00 <= shift left 1 (shift amount 1)
263 * YYYY YYYY .... YYYY <= shift right 2 (shift amount -2)
264 *
265 * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
266 */
267 struct lp_type type4;
268 const int shifts[4][2] = {
269 { 1, 2},
270 {-1, 2},
271 { 1, -2},
272 {-1, -2}
273 };
274 unsigned i;
275
276 a = LLVMBuildAnd(builder, a,
277 lp_build_const_mask_aos(bld->gallivm,
278 type, 1 << channel, 4), "");
279
280 /*
281 * Build a type where each element is an integer that cover the four
282 * channels.
283 */
284
285 type4 = type;
286 type4.floating = FALSE;
287 type4.width *= 4;
288 type4.length /= 4;
289
290 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
291
292 for(i = 0; i < 2; ++i) {
293 LLVMValueRef tmp = NULL;
294 int shift = shifts[channel][i];
295
296 /* See endianness diagram above */
297 #if UTIL_ARCH_BIG_ENDIAN
298 shift = -shift;
299 #endif
300
301 if(shift > 0)
302 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
303 if(shift < 0)
304 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
305
306 assert(tmp);
307 if(tmp)
308 a = LLVMBuildOr(builder, a, tmp, "");
309 }
310
311 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
312 }
313 }
314
315
316 /**
317 * Swizzle a vector consisting of an array of XYZW structs.
318 *
319 * This fills a vector of dst_len length with the swizzled channels from src.
320 *
321 * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
322 * RGBA RGBA = BGR BGR BG
323 *
324 * @param swizzles the swizzle array
325 * @param num_swizzles the number of elements in swizzles
326 * @param dst_len the length of the result
327 */
328 LLVMValueRef
lp_build_swizzle_aos_n(struct gallivm_state * gallivm,LLVMValueRef src,const unsigned char * swizzles,unsigned num_swizzles,unsigned dst_len)329 lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
330 LLVMValueRef src,
331 const unsigned char* swizzles,
332 unsigned num_swizzles,
333 unsigned dst_len)
334 {
335 LLVMBuilderRef builder = gallivm->builder;
336 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
337 unsigned i;
338
339 assert(dst_len < LP_MAX_VECTOR_WIDTH);
340
341 for (i = 0; i < dst_len; ++i) {
342 int swizzle = swizzles[i % num_swizzles];
343
344 if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
345 shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
346 } else {
347 shuffles[i] = lp_build_const_int32(gallivm, swizzle);
348 }
349 }
350
351 return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
352 }
353
354
355 LLVMValueRef
lp_build_swizzle_aos(struct lp_build_context * bld,LLVMValueRef a,const unsigned char swizzles[4])356 lp_build_swizzle_aos(struct lp_build_context *bld,
357 LLVMValueRef a,
358 const unsigned char swizzles[4])
359 {
360 LLVMBuilderRef builder = bld->gallivm->builder;
361 const struct lp_type type = bld->type;
362 const unsigned n = type.length;
363 unsigned i, j;
364
365 if (swizzles[0] == PIPE_SWIZZLE_X &&
366 swizzles[1] == PIPE_SWIZZLE_Y &&
367 swizzles[2] == PIPE_SWIZZLE_Z &&
368 swizzles[3] == PIPE_SWIZZLE_W) {
369 return a;
370 }
371
372 if (swizzles[0] == swizzles[1] &&
373 swizzles[1] == swizzles[2] &&
374 swizzles[2] == swizzles[3]) {
375 switch (swizzles[0]) {
376 case PIPE_SWIZZLE_X:
377 case PIPE_SWIZZLE_Y:
378 case PIPE_SWIZZLE_Z:
379 case PIPE_SWIZZLE_W:
380 return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
381 case PIPE_SWIZZLE_0:
382 return bld->zero;
383 case PIPE_SWIZZLE_1:
384 return bld->one;
385 case LP_BLD_SWIZZLE_DONTCARE:
386 return bld->undef;
387 default:
388 assert(0);
389 return bld->undef;
390 }
391 }
392
393 if (LLVMIsConstant(a) ||
394 type.width >= 16) {
395 /*
396 * Shuffle.
397 */
398 LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
399 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
400 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
401 LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
402
403 memset(aux, 0, sizeof aux);
404
405 for(j = 0; j < n; j += 4) {
406 for(i = 0; i < 4; ++i) {
407 unsigned shuffle;
408 switch (swizzles[i]) {
409 default:
410 assert(0);
411 #if defined(NDEBUG) || defined(DEBUG)
412 FALLTHROUGH;
413 #endif
414 case PIPE_SWIZZLE_X:
415 case PIPE_SWIZZLE_Y:
416 case PIPE_SWIZZLE_Z:
417 case PIPE_SWIZZLE_W:
418 shuffle = j + swizzles[i];
419 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
420 break;
421 case PIPE_SWIZZLE_0:
422 shuffle = type.length + 0;
423 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
424 if (!aux[0]) {
425 aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
426 }
427 break;
428 case PIPE_SWIZZLE_1:
429 shuffle = type.length + 1;
430 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
431 if (!aux[1]) {
432 aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
433 }
434 break;
435 case LP_BLD_SWIZZLE_DONTCARE:
436 shuffles[j + i] = LLVMGetUndef(i32t);
437 break;
438 }
439 }
440 }
441
442 for (i = 0; i < n; ++i) {
443 if (!aux[i]) {
444 aux[i] = undef;
445 }
446 }
447
448 return LLVMBuildShuffleVector(builder, a,
449 LLVMConstVector(aux, n),
450 LLVMConstVector(shuffles, n), "");
451 } else {
452 /*
453 * Bit mask and shifts.
454 *
455 * For example, this will convert BGRA to RGBA by doing
456 *
457 * Little endian:
458 * rgba = (bgra & 0x00ff0000) >> 16
459 * | (bgra & 0xff00ff00)
460 * | (bgra & 0x000000ff) << 16
461 *
462 * Big endian:A
463 * rgba = (bgra & 0x0000ff00) << 16
464 * | (bgra & 0x00ff00ff)
465 * | (bgra & 0xff000000) >> 16
466 *
467 * This is necessary not only for faster cause, but because X86 backend
468 * will refuse shuffles of <4 x i8> vectors
469 */
470 LLVMValueRef res;
471 struct lp_type type4;
472 unsigned cond = 0;
473 int chan;
474 int shift;
475
476 /*
477 * Start with a mixture of 1 and 0.
478 */
479 for (chan = 0; chan < 4; ++chan) {
480 if (swizzles[chan] == PIPE_SWIZZLE_1) {
481 cond |= 1 << chan;
482 }
483 }
484 res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
485
486 /*
487 * Build a type where each element is an integer that cover the four
488 * channels.
489 */
490 type4 = type;
491 type4.floating = FALSE;
492 type4.width *= 4;
493 type4.length /= 4;
494
495 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
496 res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
497
498 /*
499 * Mask and shift the channels, trying to group as many channels in the
500 * same shift as possible. The shift amount is positive for shifts left
501 * and negative for shifts right.
502 */
503 for (shift = -3; shift <= 3; ++shift) {
504 uint64_t mask = 0;
505
506 assert(type4.width <= sizeof(mask)*8);
507
508 /*
509 * Vector element numbers follow the XYZW order, so 0 is always X, etc.
510 * After widening 4 times we have:
511 *
512 * 3210
513 * Little-endian register layout: WZYX
514 *
515 * 0123
516 * Big-endian register layout: XYZW
517 *
518 * For little-endian, higher-numbered channels are obtained by a shift right
519 * (negative shift amount) and lower-numbered channels by a shift left
520 * (positive shift amount). The opposite is true for big-endian.
521 */
522 for (chan = 0; chan < 4; ++chan) {
523 if (swizzles[chan] < 4) {
524 /* We need to move channel swizzles[chan] into channel chan */
525 #if UTIL_ARCH_LITTLE_ENDIAN
526 if (swizzles[chan] - chan == -shift) {
527 mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
528 }
529 #else
530 if (swizzles[chan] - chan == shift) {
531 mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
532 }
533 #endif
534 }
535 }
536
537 if (mask) {
538 LLVMValueRef masked;
539 LLVMValueRef shifted;
540 if (0)
541 debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);
542
543 masked = LLVMBuildAnd(builder, a,
544 lp_build_const_int_vec(bld->gallivm, type4, mask), "");
545 if (shift > 0) {
546 shifted = LLVMBuildShl(builder, masked,
547 lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
548 } else if (shift < 0) {
549 shifted = LLVMBuildLShr(builder, masked,
550 lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
551 } else {
552 shifted = masked;
553 }
554
555 res = LLVMBuildOr(builder, res, shifted, "");
556 }
557 }
558
559 return LLVMBuildBitCast(builder, res,
560 lp_build_vec_type(bld->gallivm, type), "");
561 }
562 }
563
564
565 /**
566 * Extended swizzle of a single channel of a SoA vector.
567 *
568 * @param bld building context
569 * @param unswizzled array with the 4 unswizzled values
570 * @param swizzle one of the PIPE_SWIZZLE_*
571 *
572 * @return the swizzled value.
573 */
574 LLVMValueRef
lp_build_swizzle_soa_channel(struct lp_build_context * bld,const LLVMValueRef * unswizzled,unsigned swizzle)575 lp_build_swizzle_soa_channel(struct lp_build_context *bld,
576 const LLVMValueRef *unswizzled,
577 unsigned swizzle)
578 {
579 switch (swizzle) {
580 case PIPE_SWIZZLE_X:
581 case PIPE_SWIZZLE_Y:
582 case PIPE_SWIZZLE_Z:
583 case PIPE_SWIZZLE_W:
584 return unswizzled[swizzle];
585 case PIPE_SWIZZLE_0:
586 return bld->zero;
587 case PIPE_SWIZZLE_1:
588 return bld->one;
589 default:
590 assert(0);
591 return bld->undef;
592 }
593 }
594
595
596 /**
597 * Extended swizzle of a SoA vector.
598 *
599 * @param bld building context
600 * @param unswizzled array with the 4 unswizzled values
601 * @param swizzles array of PIPE_SWIZZLE_*
602 * @param swizzled output swizzled values
603 */
604 void
lp_build_swizzle_soa(struct lp_build_context * bld,const LLVMValueRef * unswizzled,const unsigned char swizzles[4],LLVMValueRef * swizzled)605 lp_build_swizzle_soa(struct lp_build_context *bld,
606 const LLVMValueRef *unswizzled,
607 const unsigned char swizzles[4],
608 LLVMValueRef *swizzled)
609 {
610 unsigned chan;
611
612 for (chan = 0; chan < 4; ++chan) {
613 swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
614 swizzles[chan]);
615 }
616 }
617
618
619 /**
620 * Do an extended swizzle of a SoA vector inplace.
621 *
622 * @param bld building context
623 * @param values intput/output array with the 4 values
624 * @param swizzles array of PIPE_SWIZZLE_*
625 */
626 void
lp_build_swizzle_soa_inplace(struct lp_build_context * bld,LLVMValueRef * values,const unsigned char swizzles[4])627 lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
628 LLVMValueRef *values,
629 const unsigned char swizzles[4])
630 {
631 LLVMValueRef unswizzled[4];
632 unsigned chan;
633
634 for (chan = 0; chan < 4; ++chan) {
635 unswizzled[chan] = values[chan];
636 }
637
638 lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
639 }
640
641
642 /**
643 * Transpose from AOS <-> SOA
644 *
645 * @param single_type_lp type of pixels
646 * @param src the 4 * n pixel input
647 * @param dst the 4 * n pixel output
648 */
649 void
lp_build_transpose_aos(struct gallivm_state * gallivm,struct lp_type single_type_lp,const LLVMValueRef src[4],LLVMValueRef dst[4])650 lp_build_transpose_aos(struct gallivm_state *gallivm,
651 struct lp_type single_type_lp,
652 const LLVMValueRef src[4],
653 LLVMValueRef dst[4])
654 {
655 struct lp_type double_type_lp = single_type_lp;
656 LLVMTypeRef single_type;
657 LLVMTypeRef double_type;
658 LLVMValueRef t0 = NULL, t1 = NULL, t2 = NULL, t3 = NULL;
659
660 double_type_lp.length >>= 1;
661 double_type_lp.width <<= 1;
662
663 double_type = lp_build_vec_type(gallivm, double_type_lp);
664 single_type = lp_build_vec_type(gallivm, single_type_lp);
665
666 LLVMValueRef double_type_zero = LLVMConstNull(double_type);
667 /* Interleave x, y, z, w -> xy and zw */
668 if (src[0] || src[1]) {
669 LLVMValueRef src0 = src[0];
670 LLVMValueRef src1 = src[1];
671 if (!src0)
672 src0 = LLVMConstNull(single_type);
673 if (!src1)
674 src1 = LLVMConstNull(single_type);
675 t0 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 0);
676 t2 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 1);
677
678 /* Cast to double width type for second interleave */
679 t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
680 t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
681 }
682 if (src[2] || src[3]) {
683 LLVMValueRef src2 = src[2];
684 LLVMValueRef src3 = src[3];
685 if (!src2)
686 src2 = LLVMConstNull(single_type);
687 if (!src3)
688 src3 = LLVMConstNull(single_type);
689 t1 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 0);
690 t3 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 1);
691
692 /* Cast to double width type for second interleave */
693 t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
694 t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
695 }
696
697 if (!t0)
698 t0 = double_type_zero;
699 if (!t1)
700 t1 = double_type_zero;
701 if (!t2)
702 t2 = double_type_zero;
703 if (!t3)
704 t3 = double_type_zero;
705
706 /* Interleave xy, zw -> xyzw */
707 dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
708 dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
709 dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
710 dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
711
712 /* Cast back to original single width type */
713 dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
714 dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
715 dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
716 dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
717 }
718
719
720 /**
721 * Transpose from AOS <-> SOA for num_srcs
722 */
723 void
lp_build_transpose_aos_n(struct gallivm_state * gallivm,struct lp_type type,const LLVMValueRef * src,unsigned num_srcs,LLVMValueRef * dst)724 lp_build_transpose_aos_n(struct gallivm_state *gallivm,
725 struct lp_type type,
726 const LLVMValueRef* src,
727 unsigned num_srcs,
728 LLVMValueRef* dst)
729 {
730 switch (num_srcs) {
731 case 1:
732 dst[0] = src[0];
733 break;
734
735 case 2:
736 {
737 /* Note: we must use a temporary incase src == dst */
738 LLVMValueRef lo, hi;
739
740 lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
741 hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
742
743 dst[0] = lo;
744 dst[1] = hi;
745 break;
746 }
747
748 case 4:
749 lp_build_transpose_aos(gallivm, type, src, dst);
750 break;
751
752 default:
753 assert(0);
754 }
755 }
756
757
758 /**
759 * Pack n-th element of aos values,
760 * pad out to destination size.
761 * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
762 */
763 LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,const LLVMValueRef src,unsigned channel)764 lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
765 struct lp_type src_type,
766 struct lp_type dst_type,
767 const LLVMValueRef src,
768 unsigned channel)
769 {
770 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
771 LLVMValueRef undef = LLVMGetUndef(i32t);
772 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
773 unsigned num_src = src_type.length / 4;
774 unsigned num_dst = dst_type.length;
775 unsigned i;
776
777 assert(num_src <= num_dst);
778
779 for (i = 0; i < num_src; i++) {
780 shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
781 }
782 for (i = num_src; i < num_dst; i++) {
783 shuffles[i] = undef;
784 }
785
786 if (num_dst == 1) {
787 return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
788 }
789 else {
790 return LLVMBuildShuffleVector(gallivm->builder, src, src,
791 LLVMConstVector(shuffles, num_dst), "");
792 }
793 }
794
795
796 /**
797 * Unpack and broadcast packed aos values consisting of only the
798 * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
799 */
800 LLVMValueRef
lp_build_unpack_broadcast_aos_scalars(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,const LLVMValueRef src)801 lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
802 struct lp_type src_type,
803 struct lp_type dst_type,
804 const LLVMValueRef src)
805 {
806 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
807 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
808 unsigned num_dst = dst_type.length;
809 unsigned num_src = dst_type.length / 4;
810 unsigned i;
811
812 assert(num_dst / 4 <= src_type.length);
813
814 for (i = 0; i < num_src; i++) {
815 shuffles[i*4] = LLVMConstInt(i32t, i, 0);
816 shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
817 shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
818 shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
819 }
820
821 if (num_src == 1) {
822 return lp_build_extract_broadcast(gallivm, src_type, dst_type,
823 src, shuffles[0]);
824 }
825 else {
826 return LLVMBuildShuffleVector(gallivm->builder, src, src,
827 LLVMConstVector(shuffles, num_dst), "");
828 }
829 }
830
831