• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Helper functions for swizzling/shuffling.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  */
34 
35 #include <inttypes.h>  /* for PRIx64 macro */
36 #include "util/compiler.h"
37 #include "util/u_debug.h"
38 
39 #include "lp_bld_type.h"
40 #include "lp_bld_const.h"
41 #include "lp_bld_init.h"
42 #include "lp_bld_logic.h"
43 #include "lp_bld_swizzle.h"
44 #include "lp_bld_pack.h"
45 
46 
47 LLVMValueRef
lp_build_broadcast(struct gallivm_state * gallivm,LLVMTypeRef vec_type,LLVMValueRef scalar)48 lp_build_broadcast(struct gallivm_state *gallivm,
49                    LLVMTypeRef vec_type,
50                    LLVMValueRef scalar)
51 {
52    LLVMValueRef res;
53 
54    if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
55       /* scalar */
56       assert(vec_type == LLVMTypeOf(scalar));
57       res = scalar;
58    } else {
59       LLVMBuilderRef builder = gallivm->builder;
60       const unsigned length = LLVMGetVectorSize(vec_type);
61       LLVMValueRef undef = LLVMGetUndef(vec_type);
62       /* The shuffle vector is always made of int32 elements */
63       LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
64       LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
65 
66       assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
67 
68       res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
69       res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
70    }
71 
72    return res;
73 }
74 
75 
76 /**
77  * Broadcast
78  */
79 LLVMValueRef
lp_build_broadcast_scalar(struct lp_build_context * bld,LLVMValueRef scalar)80 lp_build_broadcast_scalar(struct lp_build_context *bld,
81                           LLVMValueRef scalar)
82 {
83    assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
84 
85    return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
86 }
87 
88 
89 /**
90  * Combined extract and broadcast (mere shuffle in most cases)
91  */
92 LLVMValueRef
lp_build_extract_broadcast(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,LLVMValueRef vector,LLVMValueRef index)93 lp_build_extract_broadcast(struct gallivm_state *gallivm,
94                            struct lp_type src_type,
95                            struct lp_type dst_type,
96                            LLVMValueRef vector,
97                            LLVMValueRef index)
98 {
99    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
100    LLVMValueRef res;
101 
102    assert(src_type.floating == dst_type.floating);
103    assert(src_type.width    == dst_type.width);
104 
105    assert(lp_check_value(src_type, vector));
106    assert(LLVMTypeOf(index) == i32t);
107 
108    if (src_type.length == 1) {
109       if (dst_type.length == 1) {
110          /*
111           * Trivial scalar -> scalar.
112           */
113          res = vector;
114       } else {
115          /*
116           * Broadcast scalar -> vector.
117           */
118          res = lp_build_broadcast(gallivm,
119                                   lp_build_vec_type(gallivm, dst_type),
120                                   vector);
121       }
122    } else {
123       if (dst_type.length > 1) {
124          /*
125           * shuffle - result can be of different length.
126           */
127          LLVMValueRef shuffle;
128          shuffle = lp_build_broadcast(gallivm,
129                                       LLVMVectorType(i32t, dst_type.length),
130                                       index);
131          res = LLVMBuildShuffleVector(gallivm->builder, vector,
132                                       LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
133                                       shuffle, "");
134       } else {
135          /*
136           * Trivial extract scalar from vector.
137           */
138           res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
139       }
140    }
141 
142    return res;
143 }
144 
145 
146 /**
147  * Swizzle one channel into other channels.
148  */
149 LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context * bld,LLVMValueRef a,unsigned channel,unsigned num_channels)150 lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
151                             LLVMValueRef a,
152                             unsigned channel,
153                             unsigned num_channels)
154 {
155    LLVMBuilderRef builder = bld->gallivm->builder;
156    const struct lp_type type = bld->type;
157    const unsigned n = type.length;
158 
159    if (a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
160       return a;
161 
162    assert(num_channels == 2 || num_channels == 4);
163 
164    /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
165     * using shuffles here actually causes worst results. More investigation is
166     * needed. */
167    if (LLVMIsConstant(a) || type.width >= 16) {
168       /*
169        * Shuffle.
170        */
171       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
172       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
173 
174       for (unsigned j = 0; j < n; j += num_channels)
175          for (unsigned i = 0; i < num_channels; ++i)
176             shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
177 
178       return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
179    } else if (num_channels == 2) {
180       /*
181        * Bit mask and shifts
182        *
183        *   XY XY .... XY  <= input
184        *   0Y 0Y .... 0Y
185        *   YY YY .... YY
186        *   YY YY .... YY  <= output
187        */
188       struct lp_type type2;
189       LLVMValueRef tmp = NULL;
190       int shift;
191 
192       a = LLVMBuildAnd(builder, a,
193                        lp_build_const_mask_aos(bld->gallivm,
194                                                type, 1 << channel, num_channels), "");
195 
196       type2 = type;
197       type2.floating = FALSE;
198       type2.width *= 2;
199       type2.length /= 2;
200 
201       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
202 
203       /*
204        * Vector element 0 is always channel X.
205        *
206        *                        76 54 32 10 (array numbering)
207        * Little endian reg in:  YX YX YX YX
208        * Little endian reg out: YY YY YY YY if shift right (shift == -1)
209        *                        XX XX XX XX if shift left (shift == 1)
210        *
211        *                        01 23 45 67 (array numbering)
212        * Big endian reg in:     XY XY XY XY
213        * Big endian reg out:    YY YY YY YY if shift left (shift == 1)
214        *                        XX XX XX XX if shift right (shift == -1)
215        *
216        */
217 #if UTIL_ARCH_LITTLE_ENDIAN
218       shift = channel == 0 ? 1 : -1;
219 #else
220       shift = channel == 0 ? -1 : 1;
221 #endif
222 
223       if (shift > 0) {
224          tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
225       } else if (shift < 0) {
226          tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
227       }
228 
229       assert(tmp);
230       if (tmp) {
231          a = LLVMBuildOr(builder, a, tmp, "");
232       }
233 
234       return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
235    } else {
236       /*
237        * Bit mask and recursive shifts
238        *
239        * Little-endian registers:
240        *
241        *   7654 3210
242        *   WZYX WZYX .... WZYX  <= input
243        *   00Y0 00Y0 .... 00Y0  <= mask
244        *   00YY 00YY .... 00YY  <= shift right 1 (shift amount -1)
245        *   YYYY YYYY .... YYYY  <= shift left 2 (shift amount 2)
246        *
247        * Big-endian registers:
248        *
249        *   0123 4567
250        *   XYZW XYZW .... XYZW  <= input
251        *   0Y00 0Y00 .... 0Y00  <= mask
252        *   YY00 YY00 .... YY00  <= shift left 1 (shift amount 1)
253        *   YYYY YYYY .... YYYY  <= shift right 2 (shift amount -2)
254        *
255        * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
256        */
257       static const int shifts[4][2] = {
258          { 1,  2},
259          {-1,  2},
260          { 1, -2},
261          {-1, -2}
262       };
263 
264       a = LLVMBuildAnd(builder, a,
265                        lp_build_const_mask_aos(bld->gallivm,
266                                                type, 1 << channel, 4), "");
267 
268       /*
269        * Build a type where each element is an integer that cover the four
270        * channels.
271        */
272 
273       struct lp_type type4 = type;
274       type4.floating = FALSE;
275       type4.width *= 4;
276       type4.length /= 4;
277 
278       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
279 
280       for (unsigned i = 0; i < 2; ++i) {
281          LLVMValueRef tmp = NULL;
282          int shift = shifts[channel][i];
283 
284          /* See endianness diagram above */
285 #if UTIL_ARCH_BIG_ENDIAN
286          shift = -shift;
287 #endif
288 
289          if (shift > 0)
290             tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
291          if (shift < 0)
292             tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
293 
294          assert(tmp);
295          if (tmp)
296             a = LLVMBuildOr(builder, a, tmp, "");
297       }
298 
299       return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
300    }
301 }
302 
303 
304 /**
305  * Swizzle a vector consisting of an array of XYZW structs.
306  *
307  * This fills a vector of dst_len length with the swizzled channels from src.
308  *
309  * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
310  *      RGBA RGBA = BGR BGR BG
311  *
312  * @param swizzles        the swizzle array
313  * @param num_swizzles    the number of elements in swizzles
314  * @param dst_len         the length of the result
315  */
316 LLVMValueRef
lp_build_swizzle_aos_n(struct gallivm_state * gallivm,LLVMValueRef src,const unsigned char * swizzles,unsigned num_swizzles,unsigned dst_len)317 lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
318                        LLVMValueRef src,
319                        const unsigned char* swizzles,
320                        unsigned num_swizzles,
321                        unsigned dst_len)
322 {
323    LLVMBuilderRef builder = gallivm->builder;
324    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
325 
326    assert(dst_len < LP_MAX_VECTOR_WIDTH);
327 
328    for (unsigned i = 0; i < dst_len; ++i) {
329       int swizzle = swizzles[i % num_swizzles];
330 
331       if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
332          shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
333       } else {
334          shuffles[i] = lp_build_const_int32(gallivm, swizzle);
335       }
336    }
337 
338    return LLVMBuildShuffleVector(builder, src,
339                                  LLVMGetUndef(LLVMTypeOf(src)),
340                                  LLVMConstVector(shuffles, dst_len), "");
341 }
342 
343 
344 LLVMValueRef
lp_build_swizzle_aos(struct lp_build_context * bld,LLVMValueRef a,const unsigned char swizzles[4])345 lp_build_swizzle_aos(struct lp_build_context *bld,
346                      LLVMValueRef a,
347                      const unsigned char swizzles[4])
348 {
349    LLVMBuilderRef builder = bld->gallivm->builder;
350    const struct lp_type type = bld->type;
351    const unsigned n = type.length;
352 
353    if (swizzles[0] == PIPE_SWIZZLE_X &&
354        swizzles[1] == PIPE_SWIZZLE_Y &&
355        swizzles[2] == PIPE_SWIZZLE_Z &&
356        swizzles[3] == PIPE_SWIZZLE_W) {
357       return a;
358    }
359 
360    if (swizzles[0] == swizzles[1] &&
361        swizzles[1] == swizzles[2] &&
362        swizzles[2] == swizzles[3]) {
363       switch (swizzles[0]) {
364       case PIPE_SWIZZLE_X:
365       case PIPE_SWIZZLE_Y:
366       case PIPE_SWIZZLE_Z:
367       case PIPE_SWIZZLE_W:
368          return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
369       case PIPE_SWIZZLE_0:
370          return bld->zero;
371       case PIPE_SWIZZLE_1:
372          return bld->one;
373       case LP_BLD_SWIZZLE_DONTCARE:
374          return bld->undef;
375       default:
376          assert(0);
377          return bld->undef;
378       }
379    }
380 
381    if (LLVMIsConstant(a) ||
382        type.width >= 16) {
383       /*
384        * Shuffle.
385        */
386       LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
387       LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
388       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
389       LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
390 
391       memset(aux, 0, sizeof aux);
392 
393       for (unsigned j = 0; j < n; j += 4) {
394          for (unsigned i = 0; i < 4; ++i) {
395             unsigned shuffle;
396             switch (swizzles[i]) {
397             default:
398                assert(0);
399             case PIPE_SWIZZLE_X:
400             case PIPE_SWIZZLE_Y:
401             case PIPE_SWIZZLE_Z:
402             case PIPE_SWIZZLE_W:
403                shuffle = j + swizzles[i];
404                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
405                break;
406             case PIPE_SWIZZLE_0:
407                shuffle = type.length + 0;
408                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
409                if (!aux[0]) {
410                   aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
411                }
412                break;
413             case PIPE_SWIZZLE_1:
414                shuffle = type.length + 1;
415                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
416                if (!aux[1]) {
417                   aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
418                }
419                break;
420             case LP_BLD_SWIZZLE_DONTCARE:
421                shuffles[j + i] = LLVMGetUndef(i32t);
422                break;
423             }
424          }
425       }
426 
427       for (unsigned i = 0; i < n; ++i) {
428          if (!aux[i]) {
429             aux[i] = undef;
430          }
431       }
432 
433       return LLVMBuildShuffleVector(builder, a,
434                                     LLVMConstVector(aux, n),
435                                     LLVMConstVector(shuffles, n), "");
436    } else {
437       /*
438        * Bit mask and shifts.
439        *
440        * For example, this will convert BGRA to RGBA by doing
441        *
442        * Little endian:
443        *   rgba = (bgra & 0x00ff0000) >> 16
444        *        | (bgra & 0xff00ff00)
445        *        | (bgra & 0x000000ff) << 16
446        *
447        * Big endian:A
448        *   rgba = (bgra & 0x0000ff00) << 16
449        *        | (bgra & 0x00ff00ff)
450        *        | (bgra & 0xff000000) >> 16
451        *
452        * This is necessary not only for faster cause, but because X86 backend
453        * will refuse shuffles of <4 x i8> vectors
454        */
455 
456       /*
457        * Start with a mixture of 1 and 0.
458        */
459       unsigned cond = 0;
460       for (unsigned chan = 0; chan < 4; ++chan) {
461          if (swizzles[chan] == PIPE_SWIZZLE_1) {
462             cond |= 1 << chan;
463          }
464       }
465       LLVMValueRef res =
466          lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
467 
468       /*
469        * Build a type where each element is an integer that cover the four
470        * channels.
471        */
472       struct lp_type type4 = type;
473       type4.floating = FALSE;
474       type4.width *= 4;
475       type4.length /= 4;
476 
477       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
478       res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
479 
480       /*
481        * Mask and shift the channels, trying to group as many channels in the
482        * same shift as possible.  The shift amount is positive for shifts left
483        * and negative for shifts right.
484        */
485       for (int shift = -3; shift <= 3; ++shift) {
486          uint64_t mask = 0;
487 
488          assert(type4.width <= sizeof(mask)*8);
489 
490          /*
491           * Vector element numbers follow the XYZW order, so 0 is always X,
492           * etc.  After widening 4 times we have:
493           *
494           *                                3210
495           * Little-endian register layout: WZYX
496           *
497           *                                0123
498           * Big-endian register layout:    XYZW
499           *
500           * For little-endian, higher-numbered channels are obtained by a
501           * shift right (negative shift amount) and lower-numbered channels by
502           * a shift left (positive shift amount).  The opposite is true for
503           * big-endian.
504           */
505          for (unsigned chan = 0; chan < 4; ++chan) {
506             if (swizzles[chan] < 4) {
507                /* We need to move channel swizzles[chan] into channel chan */
508 #if UTIL_ARCH_LITTLE_ENDIAN
509                if (swizzles[chan] - chan == -shift) {
510                   mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
511                }
512 #else
513                if (swizzles[chan] - chan == shift) {
514                   mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
515                }
516 #endif
517             }
518          }
519 
520          if (mask) {
521             LLVMValueRef masked;
522             LLVMValueRef shifted;
523             if (0)
524                debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);
525 
526             masked = LLVMBuildAnd(builder, a,
527                                   lp_build_const_int_vec(bld->gallivm, type4, mask), "");
528             if (shift > 0) {
529                shifted = LLVMBuildShl(builder, masked,
530                                       lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
531             } else if (shift < 0) {
532                shifted = LLVMBuildLShr(builder, masked,
533                                        lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
534             } else {
535                shifted = masked;
536             }
537 
538             res = LLVMBuildOr(builder, res, shifted, "");
539          }
540       }
541 
542       return LLVMBuildBitCast(builder, res,
543                               lp_build_vec_type(bld->gallivm, type), "");
544    }
545 }
546 
547 
548 /**
549  * Extended swizzle of a single channel of a SoA vector.
550  *
551  * @param bld         building context
552  * @param unswizzled  array with the 4 unswizzled values
553  * @param swizzle     one of the PIPE_SWIZZLE_*
554  *
555  * @return  the swizzled value.
556  */
557 LLVMValueRef
lp_build_swizzle_soa_channel(struct lp_build_context * bld,const LLVMValueRef * unswizzled,enum pipe_swizzle swizzle)558 lp_build_swizzle_soa_channel(struct lp_build_context *bld,
559                              const LLVMValueRef *unswizzled,
560                              enum pipe_swizzle swizzle)
561 {
562    switch (swizzle) {
563    case PIPE_SWIZZLE_X:
564    case PIPE_SWIZZLE_Y:
565    case PIPE_SWIZZLE_Z:
566    case PIPE_SWIZZLE_W:
567       return unswizzled[swizzle];
568    case PIPE_SWIZZLE_0:
569       return bld->zero;
570    case PIPE_SWIZZLE_1:
571       return bld->one;
572    default:
573       assert(0);
574       return bld->undef;
575    }
576 }
577 
578 
579 /**
580  * Extended swizzle of a SoA vector.
581  *
582  * @param bld         building context
583  * @param unswizzled  array with the 4 unswizzled values
584  * @param swizzles    array of PIPE_SWIZZLE_*
585  * @param swizzled    output swizzled values
586  */
587 void
lp_build_swizzle_soa(struct lp_build_context * bld,const LLVMValueRef * unswizzled,const unsigned char swizzles[4],LLVMValueRef * swizzled)588 lp_build_swizzle_soa(struct lp_build_context *bld,
589                      const LLVMValueRef *unswizzled,
590                      const unsigned char swizzles[4],
591                      LLVMValueRef *swizzled)
592 {
593    for (unsigned chan = 0; chan < 4; ++chan) {
594       swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
595                                                     swizzles[chan]);
596    }
597 }
598 
599 
600 /**
601  * Do an extended swizzle of a SoA vector inplace.
602  *
603  * @param bld         building context
604  * @param values      intput/output array with the 4 values
605  * @param swizzles    array of PIPE_SWIZZLE_*
606  */
607 void
lp_build_swizzle_soa_inplace(struct lp_build_context * bld,LLVMValueRef * values,const unsigned char swizzles[4])608 lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
609                              LLVMValueRef *values,
610                              const unsigned char swizzles[4])
611 {
612    LLVMValueRef unswizzled[4];
613 
614    for (unsigned chan = 0; chan < 4; ++chan) {
615       unswizzled[chan] = values[chan];
616    }
617 
618    lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
619 }
620 
621 
622 /**
623  * Transpose from AOS <-> SOA
624  *
625  * @param single_type_lp   type of pixels
626  * @param src              the 4 * n pixel input
627  * @param dst              the 4 * n pixel output
628  */
629 void
lp_build_transpose_aos(struct gallivm_state * gallivm,struct lp_type single_type_lp,const LLVMValueRef src[4],LLVMValueRef dst[4])630 lp_build_transpose_aos(struct gallivm_state *gallivm,
631                        struct lp_type single_type_lp,
632                        const LLVMValueRef src[4],
633                        LLVMValueRef dst[4])
634 {
635    struct lp_type double_type_lp = single_type_lp;
636    double_type_lp.length >>= 1;
637    double_type_lp.width  <<= 1;
638 
639    LLVMTypeRef double_type = lp_build_vec_type(gallivm, double_type_lp);
640    LLVMTypeRef single_type = lp_build_vec_type(gallivm, single_type_lp);
641 
642    LLVMValueRef double_type_zero = LLVMConstNull(double_type);
643    LLVMValueRef t0 = NULL, t1 = NULL, t2 = NULL, t3 = NULL;
644 
645    /* Interleave x, y, z, w -> xy and zw */
646    if (src[0] || src[1]) {
647       LLVMValueRef src0 = src[0];
648       LLVMValueRef src1 = src[1];
649       if (!src0)
650          src0 = LLVMConstNull(single_type);
651       if (!src1)
652          src1 = LLVMConstNull(single_type);
653       t0 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 0);
654       t2 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 1);
655 
656       /* Cast to double width type for second interleave */
657       t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
658       t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
659    }
660    if (src[2] || src[3]) {
661       LLVMValueRef src2 = src[2];
662       LLVMValueRef src3 = src[3];
663       if (!src2)
664          src2 = LLVMConstNull(single_type);
665       if (!src3)
666          src3 = LLVMConstNull(single_type);
667       t1 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 0);
668       t3 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 1);
669 
670       /* Cast to double width type for second interleave */
671       t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
672       t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
673    }
674 
675    if (!t0)
676       t0 = double_type_zero;
677    if (!t1)
678       t1 = double_type_zero;
679    if (!t2)
680       t2 = double_type_zero;
681    if (!t3)
682       t3 = double_type_zero;
683 
684    /* Interleave xy, zw -> xyzw */
685    dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
686    dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
687    dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
688    dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
689 
690    /* Cast back to original single width type */
691    dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
692    dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
693    dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
694    dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
695 }
696 
697 
698 /**
699  * Transpose from AOS <-> SOA for num_srcs
700  */
701 void
lp_build_transpose_aos_n(struct gallivm_state * gallivm,struct lp_type type,const LLVMValueRef * src,unsigned num_srcs,LLVMValueRef * dst)702 lp_build_transpose_aos_n(struct gallivm_state *gallivm,
703                          struct lp_type type,
704                          const LLVMValueRef* src,
705                          unsigned num_srcs,
706                          LLVMValueRef* dst)
707 {
708    switch (num_srcs) {
709    case 1:
710       dst[0] = src[0];
711       break;
712    case 2:
713    {
714       /* Note: we must use a temporary incase src == dst */
715       LLVMValueRef lo, hi;
716 
717       lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
718       hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
719 
720       dst[0] = lo;
721       dst[1] = hi;
722       break;
723    }
724    case 4:
725       lp_build_transpose_aos(gallivm, type, src, dst);
726       break;
727    default:
728       assert(0);
729    }
730 }
731 
732 
733 /**
734  * Pack n-th element of aos values,
735  * pad out to destination size.
736  * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
737  */
738 LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,const LLVMValueRef src,unsigned channel)739 lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
740                           struct lp_type src_type,
741                           struct lp_type dst_type,
742                           const LLVMValueRef src,
743                           unsigned channel)
744 {
745    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
746    LLVMValueRef undef = LLVMGetUndef(i32t);
747    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
748    unsigned num_src = src_type.length / 4;
749    unsigned num_dst = dst_type.length;
750 
751    assert(num_src <= num_dst);
752 
753    for (unsigned i = 0; i < num_src; i++) {
754       shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
755    }
756    for (unsigned i = num_src; i < num_dst; i++) {
757       shuffles[i] = undef;
758    }
759 
760    if (num_dst == 1) {
761       return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
762    }
763    else {
764       return LLVMBuildShuffleVector(gallivm->builder, src, src,
765                                     LLVMConstVector(shuffles, num_dst), "");
766    }
767 }
768 
769 
770 /**
771  * Unpack and broadcast packed aos values consisting of only the
772  * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
773  */
774 LLVMValueRef
lp_build_unpack_broadcast_aos_scalars(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,const LLVMValueRef src)775 lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
776                                       struct lp_type src_type,
777                                       struct lp_type dst_type,
778                                       const LLVMValueRef src)
779 {
780    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
781    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
782    unsigned num_dst = dst_type.length;
783    unsigned num_src = dst_type.length / 4;
784 
785    assert(num_dst / 4 <= src_type.length);
786 
787    for (unsigned i = 0; i < num_src; i++) {
788       shuffles[i*4] = LLVMConstInt(i32t, i, 0);
789       shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
790       shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
791       shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
792    }
793 
794    if (num_src == 1) {
795       return lp_build_extract_broadcast(gallivm, src_type, dst_type,
796                                         src, shuffles[0]);
797    } else {
798       return LLVMBuildShuffleVector(gallivm->builder, src, src,
799                                     LLVMConstVector(shuffles, num_dst), "");
800    }
801 }
802 
803