1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Helper functions for swizzling/shuffling.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35 #include <inttypes.h> /* for PRIx64 macro */
36 #include "util/compiler.h"
37 #include "util/u_debug.h"
38
39 #include "lp_bld_type.h"
40 #include "lp_bld_const.h"
41 #include "lp_bld_init.h"
42 #include "lp_bld_logic.h"
43 #include "lp_bld_swizzle.h"
44 #include "lp_bld_pack.h"
45
46
47 LLVMValueRef
lp_build_broadcast(struct gallivm_state * gallivm,LLVMTypeRef vec_type,LLVMValueRef scalar)48 lp_build_broadcast(struct gallivm_state *gallivm,
49 LLVMTypeRef vec_type,
50 LLVMValueRef scalar)
51 {
52 LLVMValueRef res;
53
54 if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
55 /* scalar */
56 assert(vec_type == LLVMTypeOf(scalar));
57 res = scalar;
58 } else {
59 LLVMBuilderRef builder = gallivm->builder;
60 const unsigned length = LLVMGetVectorSize(vec_type);
61 LLVMValueRef undef = LLVMGetUndef(vec_type);
62 /* The shuffle vector is always made of int32 elements */
63 LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
64 LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
65
66 assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
67
68 res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
69 res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
70 }
71
72 return res;
73 }
74
75
76 /**
77 * Broadcast
78 */
79 LLVMValueRef
lp_build_broadcast_scalar(struct lp_build_context * bld,LLVMValueRef scalar)80 lp_build_broadcast_scalar(struct lp_build_context *bld,
81 LLVMValueRef scalar)
82 {
83 assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
84
85 return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
86 }
87
88
89 /**
90 * Combined extract and broadcast (mere shuffle in most cases)
91 */
92 LLVMValueRef
lp_build_extract_broadcast(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,LLVMValueRef vector,LLVMValueRef index)93 lp_build_extract_broadcast(struct gallivm_state *gallivm,
94 struct lp_type src_type,
95 struct lp_type dst_type,
96 LLVMValueRef vector,
97 LLVMValueRef index)
98 {
99 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
100 LLVMValueRef res;
101
102 assert(src_type.floating == dst_type.floating);
103 assert(src_type.width == dst_type.width);
104
105 assert(lp_check_value(src_type, vector));
106 assert(LLVMTypeOf(index) == i32t);
107
108 if (src_type.length == 1) {
109 if (dst_type.length == 1) {
110 /*
111 * Trivial scalar -> scalar.
112 */
113 res = vector;
114 } else {
115 /*
116 * Broadcast scalar -> vector.
117 */
118 res = lp_build_broadcast(gallivm,
119 lp_build_vec_type(gallivm, dst_type),
120 vector);
121 }
122 } else {
123 if (dst_type.length > 1) {
124 /*
125 * shuffle - result can be of different length.
126 */
127 LLVMValueRef shuffle;
128 shuffle = lp_build_broadcast(gallivm,
129 LLVMVectorType(i32t, dst_type.length),
130 index);
131 res = LLVMBuildShuffleVector(gallivm->builder, vector,
132 LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
133 shuffle, "");
134 } else {
135 /*
136 * Trivial extract scalar from vector.
137 */
138 res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
139 }
140 }
141
142 return res;
143 }
144
145
146 /**
147 * Swizzle one channel into other channels.
148 */
149 LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context * bld,LLVMValueRef a,unsigned channel,unsigned num_channels)150 lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
151 LLVMValueRef a,
152 unsigned channel,
153 unsigned num_channels)
154 {
155 LLVMBuilderRef builder = bld->gallivm->builder;
156 const struct lp_type type = bld->type;
157 const unsigned n = type.length;
158
159 if (a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
160 return a;
161
162 assert(num_channels == 2 || num_channels == 4);
163
164 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
165 * using shuffles here actually causes worst results. More investigation is
166 * needed. */
167 if (LLVMIsConstant(a) || type.width >= 16) {
168 /*
169 * Shuffle.
170 */
171 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
172 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
173
174 for (unsigned j = 0; j < n; j += num_channels)
175 for (unsigned i = 0; i < num_channels; ++i)
176 shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
177
178 return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
179 } else if (num_channels == 2) {
180 /*
181 * Bit mask and shifts
182 *
183 * XY XY .... XY <= input
184 * 0Y 0Y .... 0Y
185 * YY YY .... YY
186 * YY YY .... YY <= output
187 */
188 struct lp_type type2;
189 LLVMValueRef tmp = NULL;
190 int shift;
191
192 a = LLVMBuildAnd(builder, a,
193 lp_build_const_mask_aos(bld->gallivm,
194 type, 1 << channel, num_channels), "");
195
196 type2 = type;
197 type2.floating = FALSE;
198 type2.width *= 2;
199 type2.length /= 2;
200
201 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
202
203 /*
204 * Vector element 0 is always channel X.
205 *
206 * 76 54 32 10 (array numbering)
207 * Little endian reg in: YX YX YX YX
208 * Little endian reg out: YY YY YY YY if shift right (shift == -1)
209 * XX XX XX XX if shift left (shift == 1)
210 *
211 * 01 23 45 67 (array numbering)
212 * Big endian reg in: XY XY XY XY
213 * Big endian reg out: YY YY YY YY if shift left (shift == 1)
214 * XX XX XX XX if shift right (shift == -1)
215 *
216 */
217 #if UTIL_ARCH_LITTLE_ENDIAN
218 shift = channel == 0 ? 1 : -1;
219 #else
220 shift = channel == 0 ? -1 : 1;
221 #endif
222
223 if (shift > 0) {
224 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
225 } else if (shift < 0) {
226 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
227 }
228
229 assert(tmp);
230 if (tmp) {
231 a = LLVMBuildOr(builder, a, tmp, "");
232 }
233
234 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
235 } else {
236 /*
237 * Bit mask and recursive shifts
238 *
239 * Little-endian registers:
240 *
241 * 7654 3210
242 * WZYX WZYX .... WZYX <= input
243 * 00Y0 00Y0 .... 00Y0 <= mask
244 * 00YY 00YY .... 00YY <= shift right 1 (shift amount -1)
245 * YYYY YYYY .... YYYY <= shift left 2 (shift amount 2)
246 *
247 * Big-endian registers:
248 *
249 * 0123 4567
250 * XYZW XYZW .... XYZW <= input
251 * 0Y00 0Y00 .... 0Y00 <= mask
252 * YY00 YY00 .... YY00 <= shift left 1 (shift amount 1)
253 * YYYY YYYY .... YYYY <= shift right 2 (shift amount -2)
254 *
255 * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
256 */
257 static const int shifts[4][2] = {
258 { 1, 2},
259 {-1, 2},
260 { 1, -2},
261 {-1, -2}
262 };
263
264 a = LLVMBuildAnd(builder, a,
265 lp_build_const_mask_aos(bld->gallivm,
266 type, 1 << channel, 4), "");
267
268 /*
269 * Build a type where each element is an integer that cover the four
270 * channels.
271 */
272
273 struct lp_type type4 = type;
274 type4.floating = FALSE;
275 type4.width *= 4;
276 type4.length /= 4;
277
278 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
279
280 for (unsigned i = 0; i < 2; ++i) {
281 LLVMValueRef tmp = NULL;
282 int shift = shifts[channel][i];
283
284 /* See endianness diagram above */
285 #if UTIL_ARCH_BIG_ENDIAN
286 shift = -shift;
287 #endif
288
289 if (shift > 0)
290 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
291 if (shift < 0)
292 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
293
294 assert(tmp);
295 if (tmp)
296 a = LLVMBuildOr(builder, a, tmp, "");
297 }
298
299 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
300 }
301 }
302
303
304 /**
305 * Swizzle a vector consisting of an array of XYZW structs.
306 *
307 * This fills a vector of dst_len length with the swizzled channels from src.
308 *
309 * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
310 * RGBA RGBA = BGR BGR BG
311 *
312 * @param swizzles the swizzle array
313 * @param num_swizzles the number of elements in swizzles
314 * @param dst_len the length of the result
315 */
316 LLVMValueRef
lp_build_swizzle_aos_n(struct gallivm_state * gallivm,LLVMValueRef src,const unsigned char * swizzles,unsigned num_swizzles,unsigned dst_len)317 lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
318 LLVMValueRef src,
319 const unsigned char* swizzles,
320 unsigned num_swizzles,
321 unsigned dst_len)
322 {
323 LLVMBuilderRef builder = gallivm->builder;
324 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
325
326 assert(dst_len < LP_MAX_VECTOR_WIDTH);
327
328 for (unsigned i = 0; i < dst_len; ++i) {
329 int swizzle = swizzles[i % num_swizzles];
330
331 if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
332 shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
333 } else {
334 shuffles[i] = lp_build_const_int32(gallivm, swizzle);
335 }
336 }
337
338 return LLVMBuildShuffleVector(builder, src,
339 LLVMGetUndef(LLVMTypeOf(src)),
340 LLVMConstVector(shuffles, dst_len), "");
341 }
342
343
344 LLVMValueRef
lp_build_swizzle_aos(struct lp_build_context * bld,LLVMValueRef a,const unsigned char swizzles[4])345 lp_build_swizzle_aos(struct lp_build_context *bld,
346 LLVMValueRef a,
347 const unsigned char swizzles[4])
348 {
349 LLVMBuilderRef builder = bld->gallivm->builder;
350 const struct lp_type type = bld->type;
351 const unsigned n = type.length;
352
353 if (swizzles[0] == PIPE_SWIZZLE_X &&
354 swizzles[1] == PIPE_SWIZZLE_Y &&
355 swizzles[2] == PIPE_SWIZZLE_Z &&
356 swizzles[3] == PIPE_SWIZZLE_W) {
357 return a;
358 }
359
360 if (swizzles[0] == swizzles[1] &&
361 swizzles[1] == swizzles[2] &&
362 swizzles[2] == swizzles[3]) {
363 switch (swizzles[0]) {
364 case PIPE_SWIZZLE_X:
365 case PIPE_SWIZZLE_Y:
366 case PIPE_SWIZZLE_Z:
367 case PIPE_SWIZZLE_W:
368 return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
369 case PIPE_SWIZZLE_0:
370 return bld->zero;
371 case PIPE_SWIZZLE_1:
372 return bld->one;
373 case LP_BLD_SWIZZLE_DONTCARE:
374 return bld->undef;
375 default:
376 assert(0);
377 return bld->undef;
378 }
379 }
380
381 if (LLVMIsConstant(a) ||
382 type.width >= 16) {
383 /*
384 * Shuffle.
385 */
386 LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
387 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
388 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
389 LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
390
391 memset(aux, 0, sizeof aux);
392
393 for (unsigned j = 0; j < n; j += 4) {
394 for (unsigned i = 0; i < 4; ++i) {
395 unsigned shuffle;
396 switch (swizzles[i]) {
397 default:
398 assert(0);
399 case PIPE_SWIZZLE_X:
400 case PIPE_SWIZZLE_Y:
401 case PIPE_SWIZZLE_Z:
402 case PIPE_SWIZZLE_W:
403 shuffle = j + swizzles[i];
404 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
405 break;
406 case PIPE_SWIZZLE_0:
407 shuffle = type.length + 0;
408 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
409 if (!aux[0]) {
410 aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
411 }
412 break;
413 case PIPE_SWIZZLE_1:
414 shuffle = type.length + 1;
415 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
416 if (!aux[1]) {
417 aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
418 }
419 break;
420 case LP_BLD_SWIZZLE_DONTCARE:
421 shuffles[j + i] = LLVMGetUndef(i32t);
422 break;
423 }
424 }
425 }
426
427 for (unsigned i = 0; i < n; ++i) {
428 if (!aux[i]) {
429 aux[i] = undef;
430 }
431 }
432
433 return LLVMBuildShuffleVector(builder, a,
434 LLVMConstVector(aux, n),
435 LLVMConstVector(shuffles, n), "");
436 } else {
437 /*
438 * Bit mask and shifts.
439 *
440 * For example, this will convert BGRA to RGBA by doing
441 *
442 * Little endian:
443 * rgba = (bgra & 0x00ff0000) >> 16
444 * | (bgra & 0xff00ff00)
445 * | (bgra & 0x000000ff) << 16
446 *
447 * Big endian:A
448 * rgba = (bgra & 0x0000ff00) << 16
449 * | (bgra & 0x00ff00ff)
450 * | (bgra & 0xff000000) >> 16
451 *
452 * This is necessary not only for faster cause, but because X86 backend
453 * will refuse shuffles of <4 x i8> vectors
454 */
455
456 /*
457 * Start with a mixture of 1 and 0.
458 */
459 unsigned cond = 0;
460 for (unsigned chan = 0; chan < 4; ++chan) {
461 if (swizzles[chan] == PIPE_SWIZZLE_1) {
462 cond |= 1 << chan;
463 }
464 }
465 LLVMValueRef res =
466 lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
467
468 /*
469 * Build a type where each element is an integer that cover the four
470 * channels.
471 */
472 struct lp_type type4 = type;
473 type4.floating = FALSE;
474 type4.width *= 4;
475 type4.length /= 4;
476
477 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
478 res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
479
480 /*
481 * Mask and shift the channels, trying to group as many channels in the
482 * same shift as possible. The shift amount is positive for shifts left
483 * and negative for shifts right.
484 */
485 for (int shift = -3; shift <= 3; ++shift) {
486 uint64_t mask = 0;
487
488 assert(type4.width <= sizeof(mask)*8);
489
490 /*
491 * Vector element numbers follow the XYZW order, so 0 is always X,
492 * etc. After widening 4 times we have:
493 *
494 * 3210
495 * Little-endian register layout: WZYX
496 *
497 * 0123
498 * Big-endian register layout: XYZW
499 *
500 * For little-endian, higher-numbered channels are obtained by a
501 * shift right (negative shift amount) and lower-numbered channels by
502 * a shift left (positive shift amount). The opposite is true for
503 * big-endian.
504 */
505 for (unsigned chan = 0; chan < 4; ++chan) {
506 if (swizzles[chan] < 4) {
507 /* We need to move channel swizzles[chan] into channel chan */
508 #if UTIL_ARCH_LITTLE_ENDIAN
509 if (swizzles[chan] - chan == -shift) {
510 mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
511 }
512 #else
513 if (swizzles[chan] - chan == shift) {
514 mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
515 }
516 #endif
517 }
518 }
519
520 if (mask) {
521 LLVMValueRef masked;
522 LLVMValueRef shifted;
523 if (0)
524 debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);
525
526 masked = LLVMBuildAnd(builder, a,
527 lp_build_const_int_vec(bld->gallivm, type4, mask), "");
528 if (shift > 0) {
529 shifted = LLVMBuildShl(builder, masked,
530 lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
531 } else if (shift < 0) {
532 shifted = LLVMBuildLShr(builder, masked,
533 lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
534 } else {
535 shifted = masked;
536 }
537
538 res = LLVMBuildOr(builder, res, shifted, "");
539 }
540 }
541
542 return LLVMBuildBitCast(builder, res,
543 lp_build_vec_type(bld->gallivm, type), "");
544 }
545 }
546
547
548 /**
549 * Extended swizzle of a single channel of a SoA vector.
550 *
551 * @param bld building context
552 * @param unswizzled array with the 4 unswizzled values
553 * @param swizzle one of the PIPE_SWIZZLE_*
554 *
555 * @return the swizzled value.
556 */
557 LLVMValueRef
lp_build_swizzle_soa_channel(struct lp_build_context * bld,const LLVMValueRef * unswizzled,enum pipe_swizzle swizzle)558 lp_build_swizzle_soa_channel(struct lp_build_context *bld,
559 const LLVMValueRef *unswizzled,
560 enum pipe_swizzle swizzle)
561 {
562 switch (swizzle) {
563 case PIPE_SWIZZLE_X:
564 case PIPE_SWIZZLE_Y:
565 case PIPE_SWIZZLE_Z:
566 case PIPE_SWIZZLE_W:
567 return unswizzled[swizzle];
568 case PIPE_SWIZZLE_0:
569 return bld->zero;
570 case PIPE_SWIZZLE_1:
571 return bld->one;
572 default:
573 assert(0);
574 return bld->undef;
575 }
576 }
577
578
579 /**
580 * Extended swizzle of a SoA vector.
581 *
582 * @param bld building context
583 * @param unswizzled array with the 4 unswizzled values
584 * @param swizzles array of PIPE_SWIZZLE_*
585 * @param swizzled output swizzled values
586 */
587 void
lp_build_swizzle_soa(struct lp_build_context * bld,const LLVMValueRef * unswizzled,const unsigned char swizzles[4],LLVMValueRef * swizzled)588 lp_build_swizzle_soa(struct lp_build_context *bld,
589 const LLVMValueRef *unswizzled,
590 const unsigned char swizzles[4],
591 LLVMValueRef *swizzled)
592 {
593 for (unsigned chan = 0; chan < 4; ++chan) {
594 swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
595 swizzles[chan]);
596 }
597 }
598
599
600 /**
601 * Do an extended swizzle of a SoA vector inplace.
602 *
603 * @param bld building context
604 * @param values intput/output array with the 4 values
605 * @param swizzles array of PIPE_SWIZZLE_*
606 */
607 void
lp_build_swizzle_soa_inplace(struct lp_build_context * bld,LLVMValueRef * values,const unsigned char swizzles[4])608 lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
609 LLVMValueRef *values,
610 const unsigned char swizzles[4])
611 {
612 LLVMValueRef unswizzled[4];
613
614 for (unsigned chan = 0; chan < 4; ++chan) {
615 unswizzled[chan] = values[chan];
616 }
617
618 lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
619 }
620
621
622 /**
623 * Transpose from AOS <-> SOA
624 *
625 * @param single_type_lp type of pixels
626 * @param src the 4 * n pixel input
627 * @param dst the 4 * n pixel output
628 */
629 void
lp_build_transpose_aos(struct gallivm_state * gallivm,struct lp_type single_type_lp,const LLVMValueRef src[4],LLVMValueRef dst[4])630 lp_build_transpose_aos(struct gallivm_state *gallivm,
631 struct lp_type single_type_lp,
632 const LLVMValueRef src[4],
633 LLVMValueRef dst[4])
634 {
635 struct lp_type double_type_lp = single_type_lp;
636 double_type_lp.length >>= 1;
637 double_type_lp.width <<= 1;
638
639 LLVMTypeRef double_type = lp_build_vec_type(gallivm, double_type_lp);
640 LLVMTypeRef single_type = lp_build_vec_type(gallivm, single_type_lp);
641
642 LLVMValueRef double_type_zero = LLVMConstNull(double_type);
643 LLVMValueRef t0 = NULL, t1 = NULL, t2 = NULL, t3 = NULL;
644
645 /* Interleave x, y, z, w -> xy and zw */
646 if (src[0] || src[1]) {
647 LLVMValueRef src0 = src[0];
648 LLVMValueRef src1 = src[1];
649 if (!src0)
650 src0 = LLVMConstNull(single_type);
651 if (!src1)
652 src1 = LLVMConstNull(single_type);
653 t0 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 0);
654 t2 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 1);
655
656 /* Cast to double width type for second interleave */
657 t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
658 t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
659 }
660 if (src[2] || src[3]) {
661 LLVMValueRef src2 = src[2];
662 LLVMValueRef src3 = src[3];
663 if (!src2)
664 src2 = LLVMConstNull(single_type);
665 if (!src3)
666 src3 = LLVMConstNull(single_type);
667 t1 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 0);
668 t3 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 1);
669
670 /* Cast to double width type for second interleave */
671 t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
672 t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
673 }
674
675 if (!t0)
676 t0 = double_type_zero;
677 if (!t1)
678 t1 = double_type_zero;
679 if (!t2)
680 t2 = double_type_zero;
681 if (!t3)
682 t3 = double_type_zero;
683
684 /* Interleave xy, zw -> xyzw */
685 dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
686 dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
687 dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
688 dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
689
690 /* Cast back to original single width type */
691 dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
692 dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
693 dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
694 dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
695 }
696
697
698 /**
699 * Transpose from AOS <-> SOA for num_srcs
700 */
701 void
lp_build_transpose_aos_n(struct gallivm_state * gallivm,struct lp_type type,const LLVMValueRef * src,unsigned num_srcs,LLVMValueRef * dst)702 lp_build_transpose_aos_n(struct gallivm_state *gallivm,
703 struct lp_type type,
704 const LLVMValueRef* src,
705 unsigned num_srcs,
706 LLVMValueRef* dst)
707 {
708 switch (num_srcs) {
709 case 1:
710 dst[0] = src[0];
711 break;
712 case 2:
713 {
714 /* Note: we must use a temporary incase src == dst */
715 LLVMValueRef lo, hi;
716
717 lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
718 hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
719
720 dst[0] = lo;
721 dst[1] = hi;
722 break;
723 }
724 case 4:
725 lp_build_transpose_aos(gallivm, type, src, dst);
726 break;
727 default:
728 assert(0);
729 }
730 }
731
732
733 /**
734 * Pack n-th element of aos values,
735 * pad out to destination size.
736 * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
737 */
738 LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,const LLVMValueRef src,unsigned channel)739 lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
740 struct lp_type src_type,
741 struct lp_type dst_type,
742 const LLVMValueRef src,
743 unsigned channel)
744 {
745 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
746 LLVMValueRef undef = LLVMGetUndef(i32t);
747 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
748 unsigned num_src = src_type.length / 4;
749 unsigned num_dst = dst_type.length;
750
751 assert(num_src <= num_dst);
752
753 for (unsigned i = 0; i < num_src; i++) {
754 shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
755 }
756 for (unsigned i = num_src; i < num_dst; i++) {
757 shuffles[i] = undef;
758 }
759
760 if (num_dst == 1) {
761 return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
762 }
763 else {
764 return LLVMBuildShuffleVector(gallivm->builder, src, src,
765 LLVMConstVector(shuffles, num_dst), "");
766 }
767 }
768
769
770 /**
771 * Unpack and broadcast packed aos values consisting of only the
772 * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
773 */
774 LLVMValueRef
lp_build_unpack_broadcast_aos_scalars(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,const LLVMValueRef src)775 lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
776 struct lp_type src_type,
777 struct lp_type dst_type,
778 const LLVMValueRef src)
779 {
780 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
781 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
782 unsigned num_dst = dst_type.length;
783 unsigned num_src = dst_type.length / 4;
784
785 assert(num_dst / 4 <= src_type.length);
786
787 for (unsigned i = 0; i < num_src; i++) {
788 shuffles[i*4] = LLVMConstInt(i32t, i, 0);
789 shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
790 shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
791 shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
792 }
793
794 if (num_src == 1) {
795 return lp_build_extract_broadcast(gallivm, src_type, dst_type,
796 src, shuffles[0]);
797 } else {
798 return LLVMBuildShuffleVector(gallivm->builder, src, src,
799 LLVMConstVector(shuffles, num_dst), "");
800 }
801 }
802
803