• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Helper functions for swizzling/shuffling.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  */
34 
35 
36 #include "util/u_debug.h"
37 
38 #include "lp_bld_type.h"
39 #include "lp_bld_const.h"
40 #include "lp_bld_init.h"
41 #include "lp_bld_logic.h"
42 #include "lp_bld_swizzle.h"
43 #include "lp_bld_pack.h"
44 
45 
46 LLVMValueRef
lp_build_broadcast(struct gallivm_state * gallivm,LLVMTypeRef vec_type,LLVMValueRef scalar)47 lp_build_broadcast(struct gallivm_state *gallivm,
48                    LLVMTypeRef vec_type,
49                    LLVMValueRef scalar)
50 {
51    LLVMValueRef res;
52 
53    if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
54       /* scalar */
55       assert(vec_type == LLVMTypeOf(scalar));
56       res = scalar;
57    } else {
58       LLVMBuilderRef builder = gallivm->builder;
59       const unsigned length = LLVMGetVectorSize(vec_type);
60       LLVMValueRef undef = LLVMGetUndef(vec_type);
61       LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
62 
63       assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
64 
65       if (HAVE_LLVM >= 0x207) {
66          /* The shuffle vector is always made of int32 elements */
67          LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
68          res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
69          res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
70       } else {
71          /* XXX: The above path provokes a bug in LLVM 2.6 */
72          unsigned i;
73          res = undef;
74          for(i = 0; i < length; ++i) {
75             LLVMValueRef index = lp_build_const_int32(gallivm, i);
76             res = LLVMBuildInsertElement(builder, res, scalar, index, "");
77          }
78       }
79    }
80 
81    return res;
82 }
83 
84 
85 /**
86  * Broadcast
87  */
88 LLVMValueRef
lp_build_broadcast_scalar(struct lp_build_context * bld,LLVMValueRef scalar)89 lp_build_broadcast_scalar(struct lp_build_context *bld,
90                           LLVMValueRef scalar)
91 {
92    assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
93 
94    return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
95 }
96 
97 
98 /**
99  * Combined extract and broadcast (mere shuffle in most cases)
100  */
101 LLVMValueRef
lp_build_extract_broadcast(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,LLVMValueRef vector,LLVMValueRef index)102 lp_build_extract_broadcast(struct gallivm_state *gallivm,
103                            struct lp_type src_type,
104                            struct lp_type dst_type,
105                            LLVMValueRef vector,
106                            LLVMValueRef index)
107 {
108    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
109    LLVMValueRef res;
110 
111    assert(src_type.floating == dst_type.floating);
112    assert(src_type.width    == dst_type.width);
113 
114    assert(lp_check_value(src_type, vector));
115    assert(LLVMTypeOf(index) == i32t);
116 
117    if (src_type.length == 1) {
118       if (dst_type.length == 1) {
119          /*
120           * Trivial scalar -> scalar.
121           */
122 
123          res = vector;
124       }
125       else {
126          /*
127           * Broadcast scalar -> vector.
128           */
129 
130          res = lp_build_broadcast(gallivm,
131                                   lp_build_vec_type(gallivm, dst_type),
132                                   vector);
133       }
134    }
135    else {
136       if (dst_type.length > 1) {
137          /*
138           * shuffle - result can be of different length.
139           */
140 
141          LLVMValueRef shuffle;
142          shuffle = lp_build_broadcast(gallivm,
143                                       LLVMVectorType(i32t, dst_type.length),
144                                       index);
145          res = LLVMBuildShuffleVector(gallivm->builder, vector,
146                                       LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
147                                       shuffle, "");
148       }
149       else {
150          /*
151           * Trivial extract scalar from vector.
152           */
153           res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
154       }
155    }
156 
157    return res;
158 }
159 
160 
161 /**
162  * Swizzle one channel into all other three channels.
163  */
164 LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context * bld,LLVMValueRef a,unsigned channel)165 lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
166                             LLVMValueRef a,
167                             unsigned channel)
168 {
169    LLVMBuilderRef builder = bld->gallivm->builder;
170    const struct lp_type type = bld->type;
171    const unsigned n = type.length;
172    unsigned i, j;
173 
174    if(a == bld->undef || a == bld->zero || a == bld->one)
175       return a;
176 
177    /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
178     * using shuffles here actually causes worst results. More investigation is
179     * needed. */
180    if (type.width >= 16) {
181       /*
182        * Shuffle.
183        */
184       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
185       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
186 
187       for(j = 0; j < n; j += 4)
188          for(i = 0; i < 4; ++i)
189             shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
190 
191       return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
192    }
193    else {
194       /*
195        * Bit mask and recursive shifts
196        *
197        *   XYZW XYZW .... XYZW  <= input
198        *   0Y00 0Y00 .... 0Y00
199        *   YY00 YY00 .... YY00
200        *   YYYY YYYY .... YYYY  <= output
201        */
202       struct lp_type type4;
203       const char shifts[4][2] = {
204          { 1,  2},
205          {-1,  2},
206          { 1, -2},
207          {-1, -2}
208       };
209       unsigned i;
210 
211       a = LLVMBuildAnd(builder, a,
212                        lp_build_const_mask_aos(bld->gallivm,
213                                                type, 1 << channel), "");
214 
215       /*
216        * Build a type where each element is an integer that cover the four
217        * channels.
218        */
219 
220       type4 = type;
221       type4.floating = FALSE;
222       type4.width *= 4;
223       type4.length /= 4;
224 
225       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
226 
227       for(i = 0; i < 2; ++i) {
228          LLVMValueRef tmp = NULL;
229          int shift = shifts[channel][i];
230 
231 #ifdef PIPE_ARCH_LITTLE_ENDIAN
232          shift = -shift;
233 #endif
234 
235          if(shift > 0)
236             tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
237          if(shift < 0)
238             tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
239 
240          assert(tmp);
241          if(tmp)
242             a = LLVMBuildOr(builder, a, tmp, "");
243       }
244 
245       return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
246    }
247 }
248 
249 
250 LLVMValueRef
lp_build_swizzle_aos(struct lp_build_context * bld,LLVMValueRef a,const unsigned char swizzles[4])251 lp_build_swizzle_aos(struct lp_build_context *bld,
252                      LLVMValueRef a,
253                      const unsigned char swizzles[4])
254 {
255    LLVMBuilderRef builder = bld->gallivm->builder;
256    const struct lp_type type = bld->type;
257    const unsigned n = type.length;
258    unsigned i, j;
259 
260    if (swizzles[0] == PIPE_SWIZZLE_RED &&
261        swizzles[1] == PIPE_SWIZZLE_GREEN &&
262        swizzles[2] == PIPE_SWIZZLE_BLUE &&
263        swizzles[3] == PIPE_SWIZZLE_ALPHA) {
264       return a;
265    }
266 
267    if (swizzles[0] == swizzles[1] &&
268        swizzles[1] == swizzles[2] &&
269        swizzles[2] == swizzles[3]) {
270       switch (swizzles[0]) {
271       case PIPE_SWIZZLE_RED:
272       case PIPE_SWIZZLE_GREEN:
273       case PIPE_SWIZZLE_BLUE:
274       case PIPE_SWIZZLE_ALPHA:
275          return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]);
276       case PIPE_SWIZZLE_ZERO:
277          return bld->zero;
278       case PIPE_SWIZZLE_ONE:
279          return bld->one;
280       case LP_BLD_SWIZZLE_DONTCARE:
281          return bld->undef;
282       default:
283          assert(0);
284          return bld->undef;
285       }
286    }
287 
288    if (type.width >= 16) {
289       /*
290        * Shuffle.
291        */
292       LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
293       LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
294       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
295       LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
296 
297       memset(aux, 0, sizeof aux);
298 
299       for(j = 0; j < n; j += 4) {
300          for(i = 0; i < 4; ++i) {
301             unsigned shuffle;
302             switch (swizzles[i]) {
303             default:
304                assert(0);
305                /* fall through */
306             case PIPE_SWIZZLE_RED:
307             case PIPE_SWIZZLE_GREEN:
308             case PIPE_SWIZZLE_BLUE:
309             case PIPE_SWIZZLE_ALPHA:
310                shuffle = j + swizzles[i];
311                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
312                break;
313             case PIPE_SWIZZLE_ZERO:
314                shuffle = type.length + 0;
315                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
316                if (!aux[0]) {
317                   aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
318                }
319                break;
320             case PIPE_SWIZZLE_ONE:
321                shuffle = type.length + 1;
322                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
323                if (!aux[1]) {
324                   aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
325                }
326                break;
327             case LP_BLD_SWIZZLE_DONTCARE:
328                shuffles[j + i] = LLVMGetUndef(i32t);
329                break;
330             }
331          }
332       }
333 
334       for (i = 0; i < n; ++i) {
335          if (!aux[i]) {
336             aux[i] = undef;
337          }
338       }
339 
340       return LLVMBuildShuffleVector(builder, a,
341                                     LLVMConstVector(aux, n),
342                                     LLVMConstVector(shuffles, n), "");
343    } else {
344       /*
345        * Bit mask and shifts.
346        *
347        * For example, this will convert BGRA to RGBA by doing
348        *
349        *   rgba = (bgra & 0x00ff0000) >> 16
350        *        | (bgra & 0xff00ff00)
351        *        | (bgra & 0x000000ff) << 16
352        *
353        * This is necessary not only for faster cause, but because X86 backend
354        * will refuse shuffles of <4 x i8> vectors
355        */
356       LLVMValueRef res;
357       struct lp_type type4;
358       unsigned cond = 0;
359       unsigned chan;
360       int shift;
361 
362       /*
363        * Start with a mixture of 1 and 0.
364        */
365       for (chan = 0; chan < 4; ++chan) {
366          if (swizzles[chan] == PIPE_SWIZZLE_ONE) {
367             cond |= 1 << chan;
368          }
369       }
370       res = lp_build_select_aos(bld, cond, bld->one, bld->zero);
371 
372       /*
373        * Build a type where each element is an integer that cover the four
374        * channels.
375        */
376       type4 = type;
377       type4.floating = FALSE;
378       type4.width *= 4;
379       type4.length /= 4;
380 
381       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
382       res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
383 
384       /*
385        * Mask and shift the channels, trying to group as many channels in the
386        * same shift as possible
387        */
388       for (shift = -3; shift <= 3; ++shift) {
389          unsigned long long mask = 0;
390 
391          assert(type4.width <= sizeof(mask)*8);
392 
393          for (chan = 0; chan < 4; ++chan) {
394             /* FIXME: big endian */
395             if (swizzles[chan] < 4 &&
396                 chan - swizzles[chan] == shift) {
397                mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
398             }
399          }
400 
401          if (mask) {
402             LLVMValueRef masked;
403             LLVMValueRef shifted;
404 
405             if (0)
406                debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask);
407 
408             masked = LLVMBuildAnd(builder, a,
409                                   lp_build_const_int_vec(bld->gallivm, type4, mask), "");
410             if (shift > 0) {
411                shifted = LLVMBuildShl(builder, masked,
412                                       lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
413             } else if (shift < 0) {
414                shifted = LLVMBuildLShr(builder, masked,
415                                        lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
416             } else {
417                shifted = masked;
418             }
419 
420             res = LLVMBuildOr(builder, res, shifted, "");
421          }
422       }
423 
424       return LLVMBuildBitCast(builder, res,
425                               lp_build_vec_type(bld->gallivm, type), "");
426    }
427 }
428 
429 
430 /**
431  * Extended swizzle of a single channel of a SoA vector.
432  *
433  * @param bld         building context
434  * @param unswizzled  array with the 4 unswizzled values
435  * @param swizzle     one of the PIPE_SWIZZLE_*
436  *
437  * @return  the swizzled value.
438  */
439 LLVMValueRef
lp_build_swizzle_soa_channel(struct lp_build_context * bld,const LLVMValueRef * unswizzled,unsigned swizzle)440 lp_build_swizzle_soa_channel(struct lp_build_context *bld,
441                              const LLVMValueRef *unswizzled,
442                              unsigned swizzle)
443 {
444    switch (swizzle) {
445    case PIPE_SWIZZLE_RED:
446    case PIPE_SWIZZLE_GREEN:
447    case PIPE_SWIZZLE_BLUE:
448    case PIPE_SWIZZLE_ALPHA:
449       return unswizzled[swizzle];
450    case PIPE_SWIZZLE_ZERO:
451       return bld->zero;
452    case PIPE_SWIZZLE_ONE:
453       return bld->one;
454    default:
455       assert(0);
456       return bld->undef;
457    }
458 }
459 
460 
461 /**
462  * Extended swizzle of a SoA vector.
463  *
464  * @param bld         building context
465  * @param unswizzled  array with the 4 unswizzled values
466  * @param swizzles    array of PIPE_SWIZZLE_*
467  * @param swizzled    output swizzled values
468  */
469 void
lp_build_swizzle_soa(struct lp_build_context * bld,const LLVMValueRef * unswizzled,const unsigned char swizzles[4],LLVMValueRef * swizzled)470 lp_build_swizzle_soa(struct lp_build_context *bld,
471                      const LLVMValueRef *unswizzled,
472                      const unsigned char swizzles[4],
473                      LLVMValueRef *swizzled)
474 {
475    unsigned chan;
476 
477    for (chan = 0; chan < 4; ++chan) {
478       swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
479                                                     swizzles[chan]);
480    }
481 }
482 
483 
484 /**
485  * Do an extended swizzle of a SoA vector inplace.
486  *
487  * @param bld         building context
488  * @param values      intput/output array with the 4 values
489  * @param swizzles    array of PIPE_SWIZZLE_*
490  */
491 void
lp_build_swizzle_soa_inplace(struct lp_build_context * bld,LLVMValueRef * values,const unsigned char swizzles[4])492 lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
493                              LLVMValueRef *values,
494                              const unsigned char swizzles[4])
495 {
496    LLVMValueRef unswizzled[4];
497    unsigned chan;
498 
499    for (chan = 0; chan < 4; ++chan) {
500       unswizzled[chan] = values[chan];
501    }
502 
503    lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
504 }
505 
506 
507 /**
508  * Transpose from AOS <-> SOA
509  *
510  * @param single_type_lp   type of pixels
511  * @param src              the 4 * n pixel input
512  * @param dst              the 4 * n pixel output
513  */
514 void
lp_build_transpose_aos(struct gallivm_state * gallivm,struct lp_type single_type_lp,const LLVMValueRef src[4],LLVMValueRef dst[4])515 lp_build_transpose_aos(struct gallivm_state *gallivm,
516                        struct lp_type single_type_lp,
517                        const LLVMValueRef src[4],
518                        LLVMValueRef dst[4])
519 {
520    struct lp_type double_type_lp = single_type_lp;
521    LLVMTypeRef single_type;
522    LLVMTypeRef double_type;
523    LLVMValueRef t0, t1, t2, t3;
524 
525    double_type_lp.length >>= 1;
526    double_type_lp.width  <<= 1;
527 
528    double_type = lp_build_vec_type(gallivm, double_type_lp);
529    single_type = lp_build_vec_type(gallivm, single_type_lp);
530 
531    /* Interleave x, y, z, w -> xy and zw */
532    t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0);
533    t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0);
534    t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1);
535    t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1);
536 
537    /* Cast to double width type for second interleave */
538    t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
539    t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
540    t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
541    t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
542 
543    /* Interleave xy, zw -> xyzw */
544    dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
545    dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
546    dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
547    dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
548 
549    /* Cast back to original single width type */
550    dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
551    dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
552    dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
553    dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
554 }
555 
556 
557 /**
558  * Pack first element of aos values,
559  * pad out to destination size.
560  * i.e. x1 _ _ _ x2 _ _ _ will become x1 x2 _ _
561  */
562 LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,const LLVMValueRef src)563 lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
564                           struct lp_type src_type,
565                           struct lp_type dst_type,
566                           const LLVMValueRef src)
567 {
568    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
569    LLVMValueRef undef = LLVMGetUndef(i32t);
570    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
571    unsigned num_src = src_type.length / 4;
572    unsigned num_dst = dst_type.length;
573    unsigned i;
574 
575    assert(num_src <= num_dst);
576 
577    for (i = 0; i < num_src; i++) {
578       shuffles[i] = LLVMConstInt(i32t, i * 4, 0);
579    }
580    for (i = num_src; i < num_dst; i++) {
581       shuffles[i] = undef;
582    }
583 
584    if (num_dst == 1) {
585       return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
586    }
587    else {
588       return LLVMBuildShuffleVector(gallivm->builder, src, src,
589                                     LLVMConstVector(shuffles, num_dst), "");
590    }
591 }
592 
593 
594 /**
595  * Unpack and broadcast packed aos values consisting of only the
596  * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
597  */
598 LLVMValueRef
lp_build_unpack_broadcast_aos_scalars(struct gallivm_state * gallivm,struct lp_type src_type,struct lp_type dst_type,const LLVMValueRef src)599 lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
600                                       struct lp_type src_type,
601                                       struct lp_type dst_type,
602                                       const LLVMValueRef src)
603 {
604    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
605    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
606    unsigned num_dst = dst_type.length;
607    unsigned num_src = dst_type.length / 4;
608    unsigned i;
609 
610    assert(num_dst / 4 <= src_type.length);
611 
612    for (i = 0; i < num_src; i++) {
613       shuffles[i*4] = LLVMConstInt(i32t, i, 0);
614       shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
615       shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
616       shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
617    }
618 
619    if (num_src == 1) {
620       return lp_build_extract_broadcast(gallivm, src_type, dst_type,
621                                         src, shuffles[0]);
622    }
623    else {
624       return LLVMBuildShuffleVector(gallivm->builder, src, src,
625                                     LLVMConstVector(shuffles, num_dst), "");
626    }
627 }
628 
629