• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Helper functions for logical operations.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  */
34 
35 #include <llvm/Config/llvm-config.h>
36 
37 #include "util/u_cpu_detect.h"
38 #include "util/u_memory.h"
39 #include "util/u_debug.h"
40 
41 #include "lp_bld_type.h"
42 #include "lp_bld_const.h"
43 #include "lp_bld_swizzle.h"
44 #include "lp_bld_init.h"
45 #include "lp_bld_intr.h"
46 #include "lp_bld_debug.h"
47 #include "lp_bld_logic.h"
48 
49 
50 /*
51  * XXX
52  *
53  * Selection with vector conditional like
54  *
55  *    select <4 x i1> %C, %A, %B
56  *
57  * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
58  * supported on some backends (x86) starting with llvm 3.1.
59  *
60  * Expanding the boolean vector to full SIMD register width, as in
61  *
62  *    sext <4 x i1> %C to <4 x i32>
63  *
64  * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
65  * it causes assertion failures in LLVM 2.6. It appears to work correctly on
66  * LLVM 2.7.
67  */
68 
69 
70 /**
71  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
72  * \param func  one of PIPE_FUNC_x
73  * If the ordered argument is true the function will use LLVM's ordered
74  * comparisons, otherwise unordered comparisons will be used.
75  * The result values will be 0 for false or ~0 for true.
76  */
77 static LLVMValueRef
lp_build_compare_ext(struct gallivm_state * gallivm,const struct lp_type type,enum pipe_compare_func func,LLVMValueRef a,LLVMValueRef b,boolean ordered)78 lp_build_compare_ext(struct gallivm_state *gallivm,
79                      const struct lp_type type,
80                      enum pipe_compare_func func,
81                      LLVMValueRef a,
82                      LLVMValueRef b,
83                      boolean ordered)
84 {
85    LLVMBuilderRef builder = gallivm->builder;
86    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
87    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
88    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
89    LLVMValueRef cond;
90    LLVMValueRef res;
91 
92    assert(lp_check_value(type, a));
93    assert(lp_check_value(type, b));
94 
95    if (func == PIPE_FUNC_NEVER)
96       return zeros;
97    if (func == PIPE_FUNC_ALWAYS)
98       return ones;
99 
100    assert(func > PIPE_FUNC_NEVER);
101    assert(func < PIPE_FUNC_ALWAYS);
102 
103    if (type.floating) {
104       LLVMRealPredicate op;
105       switch(func) {
106       case PIPE_FUNC_EQUAL:
107          op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
108          break;
109       case PIPE_FUNC_NOTEQUAL:
110          op = ordered ? LLVMRealONE : LLVMRealUNE;
111          break;
112       case PIPE_FUNC_LESS:
113          op = ordered ? LLVMRealOLT : LLVMRealULT;
114          break;
115       case PIPE_FUNC_LEQUAL:
116          op = ordered ? LLVMRealOLE : LLVMRealULE;
117          break;
118       case PIPE_FUNC_GREATER:
119          op = ordered ? LLVMRealOGT : LLVMRealUGT;
120          break;
121       case PIPE_FUNC_GEQUAL:
122          op = ordered ? LLVMRealOGE : LLVMRealUGE;
123          break;
124       default:
125          assert(0);
126          return lp_build_undef(gallivm, type);
127       }
128 
129       cond = LLVMBuildFCmp(builder, op, a, b, "");
130       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
131    }
132    else {
133       LLVMIntPredicate op;
134       switch(func) {
135       case PIPE_FUNC_EQUAL:
136          op = LLVMIntEQ;
137          break;
138       case PIPE_FUNC_NOTEQUAL:
139          op = LLVMIntNE;
140          break;
141       case PIPE_FUNC_LESS:
142          op = type.sign ? LLVMIntSLT : LLVMIntULT;
143          break;
144       case PIPE_FUNC_LEQUAL:
145          op = type.sign ? LLVMIntSLE : LLVMIntULE;
146          break;
147       case PIPE_FUNC_GREATER:
148          op = type.sign ? LLVMIntSGT : LLVMIntUGT;
149          break;
150       case PIPE_FUNC_GEQUAL:
151          op = type.sign ? LLVMIntSGE : LLVMIntUGE;
152          break;
153       default:
154          assert(0);
155          return lp_build_undef(gallivm, type);
156       }
157 
158       cond = LLVMBuildICmp(builder, op, a, b, "");
159       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
160    }
161 
162    return res;
163 }
164 
165 /**
166  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
167  * \param func  one of PIPE_FUNC_x
168  * The result values will be 0 for false or ~0 for true.
169  */
170 LLVMValueRef
lp_build_compare(struct gallivm_state * gallivm,const struct lp_type type,enum pipe_compare_func func,LLVMValueRef a,LLVMValueRef b)171 lp_build_compare(struct gallivm_state *gallivm,
172                  const struct lp_type type,
173                  enum pipe_compare_func func,
174                  LLVMValueRef a,
175                  LLVMValueRef b)
176 {
177    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
178    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
179    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
180 
181    assert(lp_check_value(type, a));
182    assert(lp_check_value(type, b));
183 
184    if (func == PIPE_FUNC_NEVER)
185       return zeros;
186    if (func == PIPE_FUNC_ALWAYS)
187       return ones;
188 
189    assert(func > PIPE_FUNC_NEVER);
190    assert(func < PIPE_FUNC_ALWAYS);
191 
192 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
193    /*
194     * There are no unsigned integer comparison instructions in SSE.
195     */
196 
197    if (!type.floating && !type.sign &&
198        type.width * type.length == 128 &&
199        util_get_cpu_caps()->has_sse2 &&
200        (func == PIPE_FUNC_LESS ||
201         func == PIPE_FUNC_LEQUAL ||
202         func == PIPE_FUNC_GREATER ||
203         func == PIPE_FUNC_GEQUAL) &&
204        (gallivm_debug & GALLIVM_DEBUG_PERF)) {
205          debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
206                       __FUNCTION__, type.length, type.width);
207    }
208 #endif
209 
210    return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
211 }
212 
213 /**
214  * Build code to compare two values 'a' and 'b' using the given func.
215  * \param func  one of PIPE_FUNC_x
216  * If the operands are floating point numbers, the function will use
217  * ordered comparison which means that it will return true if both
218  * operands are not a NaN and the specified condition evaluates to true.
219  * The result values will be 0 for false or ~0 for true.
220  */
221 LLVMValueRef
lp_build_cmp_ordered(struct lp_build_context * bld,enum pipe_compare_func func,LLVMValueRef a,LLVMValueRef b)222 lp_build_cmp_ordered(struct lp_build_context *bld,
223                      enum pipe_compare_func func,
224                      LLVMValueRef a,
225                      LLVMValueRef b)
226 {
227    return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
228 }
229 
230 /**
231  * Build code to compare two values 'a' and 'b' using the given func.
232  * \param func  one of PIPE_FUNC_x
233  * If the operands are floating point numbers, the function will use
234  * unordered comparison which means that it will return true if either
235  * operand is a NaN or the specified condition evaluates to true.
236  * The result values will be 0 for false or ~0 for true.
237  */
238 LLVMValueRef
lp_build_cmp(struct lp_build_context * bld,enum pipe_compare_func func,LLVMValueRef a,LLVMValueRef b)239 lp_build_cmp(struct lp_build_context *bld,
240              enum pipe_compare_func func,
241              LLVMValueRef a,
242              LLVMValueRef b)
243 {
244    return lp_build_compare(bld->gallivm, bld->type, func, a, b);
245 }
246 
247 
248 /**
249  * Return (mask & a) | (~mask & b);
250  */
251 LLVMValueRef
lp_build_select_bitwise(struct lp_build_context * bld,LLVMValueRef mask,LLVMValueRef a,LLVMValueRef b)252 lp_build_select_bitwise(struct lp_build_context *bld,
253                         LLVMValueRef mask,
254                         LLVMValueRef a,
255                         LLVMValueRef b)
256 {
257    LLVMBuilderRef builder = bld->gallivm->builder;
258    struct lp_type type = bld->type;
259    LLVMValueRef res;
260    LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
261 
262    assert(lp_check_value(type, a));
263    assert(lp_check_value(type, b));
264 
265    if (a == b) {
266       return a;
267    }
268 
269    if (type.floating) {
270       a = LLVMBuildBitCast(builder, a, int_vec_type, "");
271       b = LLVMBuildBitCast(builder, b, int_vec_type, "");
272    }
273 
274    if (type.width > 32)
275       mask = LLVMBuildSExt(builder, mask, int_vec_type, "");
276    a = LLVMBuildAnd(builder, a, mask, "");
277 
278    /* This often gets translated to PANDN, but sometimes the NOT is
279     * pre-computed and stored in another constant. The best strategy depends
280     * on available registers, so it is not a big deal -- hopefully LLVM does
281     * the right decision attending the rest of the program.
282     */
283    b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
284 
285    res = LLVMBuildOr(builder, a, b, "");
286 
287    if (type.floating) {
288       LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
289       res = LLVMBuildBitCast(builder, res, vec_type, "");
290    }
291 
292    return res;
293 }
294 
295 
296 /**
297  * Return mask ? a : b;
298  *
299  * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
300  * will yield unpredictable results.
301  */
302 LLVMValueRef
lp_build_select(struct lp_build_context * bld,LLVMValueRef mask,LLVMValueRef a,LLVMValueRef b)303 lp_build_select(struct lp_build_context *bld,
304                 LLVMValueRef mask,
305                 LLVMValueRef a,
306                 LLVMValueRef b)
307 {
308    LLVMBuilderRef builder = bld->gallivm->builder;
309    LLVMContextRef lc = bld->gallivm->context;
310    struct lp_type type = bld->type;
311    LLVMValueRef res;
312 
313    assert(lp_check_value(type, a));
314    assert(lp_check_value(type, b));
315 
316    if (a == b)
317       return a;
318 
319    if (type.length == 1) {
320       mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
321       res = LLVMBuildSelect(builder, mask, a, b, "");
322    }
323    else if (LLVMIsConstant(mask) ||
324             LLVMGetInstructionOpcode(mask) == LLVMSExt) {
325       /* Generate a vector select.
326        *
327        * Using vector selects should avoid emitting intrinsics hence avoid
328        * hindering optimization passes, but vector selects weren't properly
329        * supported yet for a long time, and LLVM will generate poor code when
330        * the mask is not the result of a comparison.
331        * XXX: Even if the instruction was an SExt, this may still produce
332        * terrible code. Try piglit stencil-twoside.
333        */
334 
335       /* Convert the mask to a vector of booleans.
336        *
337        * XXX: In x86 the mask is controlled by the MSB, so if we shifted the
338        * mask by `type.width - 1`, LLVM should realize the mask is ready.  Alas
339        * what really happens is that LLVM will emit two shifts back to back.
340        */
341       if (0) {
342          LLVMValueRef shift =
343             LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0);
344          shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift);
345          mask = LLVMBuildLShr(builder, mask, shift, "");
346       }
347       LLVMTypeRef bool_vec_type =
348          LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
349       mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
350 
351       res = LLVMBuildSelect(builder, mask, a, b, "");
352    }
353    else if (((util_get_cpu_caps()->has_sse4_1 &&
354               type.width * type.length == 128) ||
355              (util_get_cpu_caps()->has_avx &&
356               type.width * type.length == 256 && type.width >= 32) ||
357              (util_get_cpu_caps()->has_avx2 &&
358               type.width * type.length == 256)) &&
359             !LLVMIsConstant(a) &&
360             !LLVMIsConstant(b) &&
361             !LLVMIsConstant(mask)) {
362       const char *intrinsic;
363       LLVMTypeRef arg_type;
364       LLVMValueRef args[3];
365 
366       LLVMTypeRef mask_type = LLVMGetElementType(LLVMTypeOf(mask));
367       if (LLVMGetIntTypeWidth(mask_type) != type.width) {
368          LLVMTypeRef int_vec_type =
369             LLVMVectorType(LLVMIntTypeInContext(lc, type.width), type.length);
370          mask = LLVMBuildSExt(builder, mask, int_vec_type, "");
371       }
372       /*
373        *  There's only float blend in AVX but can just cast i32/i64
374        *  to float.
375        */
376       if (type.width * type.length == 256) {
377          if (type.width == 64) {
378            intrinsic = "llvm.x86.avx.blendv.pd.256";
379            arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
380          }
381          else if (type.width == 32) {
382             intrinsic = "llvm.x86.avx.blendv.ps.256";
383             arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
384          } else {
385             assert(util_get_cpu_caps()->has_avx2);
386             intrinsic = "llvm.x86.avx2.pblendvb";
387             arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32);
388          }
389       }
390       else if (type.floating &&
391                type.width == 64) {
392          intrinsic = "llvm.x86.sse41.blendvpd";
393          arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
394       } else if (type.floating &&
395                  type.width == 32) {
396          intrinsic = "llvm.x86.sse41.blendvps";
397          arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
398       } else {
399          intrinsic = "llvm.x86.sse41.pblendvb";
400          arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
401       }
402 
403       if (arg_type != bld->int_vec_type) {
404          mask = LLVMBuildBitCast(builder, mask, arg_type, "");
405       }
406 
407       if (arg_type != bld->vec_type) {
408          a = LLVMBuildBitCast(builder, a, arg_type, "");
409          b = LLVMBuildBitCast(builder, b, arg_type, "");
410       }
411 
412       args[0] = b;
413       args[1] = a;
414       args[2] = mask;
415 
416       res = lp_build_intrinsic(builder, intrinsic,
417                                arg_type, args, ARRAY_SIZE(args), 0);
418 
419       if (arg_type != bld->vec_type) {
420          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
421       }
422    }
423    else {
424       res = lp_build_select_bitwise(bld, mask, a, b);
425    }
426 
427    return res;
428 }
429 
430 
431 /**
432  * Return mask ? a : b;
433  *
434  * mask is a TGSI_WRITEMASK_xxx.
435  */
436 LLVMValueRef
lp_build_select_aos(struct lp_build_context * bld,unsigned mask,LLVMValueRef a,LLVMValueRef b,unsigned num_channels)437 lp_build_select_aos(struct lp_build_context *bld,
438                     unsigned mask,
439                     LLVMValueRef a,
440                     LLVMValueRef b,
441                     unsigned num_channels)
442 {
443    LLVMBuilderRef builder = bld->gallivm->builder;
444    const struct lp_type type = bld->type;
445    const unsigned n = type.length;
446 
447    assert((mask & ~0xf) == 0);
448    assert(lp_check_value(type, a));
449    assert(lp_check_value(type, b));
450 
451    if (a == b)
452       return a;
453    if ((mask & 0xf) == 0xf)
454       return a;
455    if ((mask & 0xf) == 0x0)
456       return b;
457    if (a == bld->undef || b == bld->undef)
458       return bld->undef;
459 
460    /*
461     * There are two major ways of accomplishing this:
462     * - with a shuffle
463     * - with a select
464     *
465     * The flip between these is empirical and might need to be adjusted.
466     */
467    if (n <= 4) {
468       /*
469        * Shuffle.
470        */
471       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
472       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
473 
474       for (unsigned j = 0; j < n; j += num_channels)
475          for (unsigned i = 0; i < num_channels; ++i)
476             shuffles[j + i] = LLVMConstInt(elem_type,
477                                            (mask & (1 << i) ? 0 : n) + j + i,
478                                            0);
479 
480       return LLVMBuildShuffleVector(builder, a, b,
481                                     LLVMConstVector(shuffles, n), "");
482    }
483    else {
484       LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm,
485                                                       type, mask, num_channels);
486       return lp_build_select(bld, mask_vec, a, b);
487    }
488 }
489 
490 
491 /**
492  * Return (scalar-cast)val ? true : false;
493  */
494 LLVMValueRef
lp_build_any_true_range(struct lp_build_context * bld,unsigned real_length,LLVMValueRef val)495 lp_build_any_true_range(struct lp_build_context *bld,
496                         unsigned real_length,
497                         LLVMValueRef val)
498 {
499    LLVMBuilderRef builder = bld->gallivm->builder;
500    LLVMTypeRef scalar_type;
501    LLVMTypeRef true_type;
502 
503    assert(real_length <= bld->type.length);
504 
505    true_type = LLVMIntTypeInContext(bld->gallivm->context,
506                                     bld->type.width * real_length);
507    scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
508                                       bld->type.width * bld->type.length);
509    val = LLVMBuildBitCast(builder, val, scalar_type, "");
510    /*
511     * We're using always native types so we can use intrinsics.
512     * However, if we don't do per-element calculations, we must ensure
513     * the excess elements aren't used since they may contain garbage.
514     */
515    if (real_length < bld->type.length) {
516       val = LLVMBuildTrunc(builder, val, true_type, "");
517    }
518    return LLVMBuildICmp(builder, LLVMIntNE,
519                         val, LLVMConstNull(true_type), "");
520 }
521