1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Helper functions for logical operations.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36 #include "util/u_cpu_detect.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39
40 #include "lp_bld_type.h"
41 #include "lp_bld_const.h"
42 #include "lp_bld_swizzle.h"
43 #include "lp_bld_init.h"
44 #include "lp_bld_intr.h"
45 #include "lp_bld_debug.h"
46 #include "lp_bld_logic.h"
47
48
49 /*
50 * XXX
51 *
52 * Selection with vector conditional like
53 *
54 * select <4 x i1> %C, %A, %B
55 *
56 * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
57 * supported on some backends (x86) starting with llvm 3.1.
58 *
59 * Expanding the boolean vector to full SIMD register width, as in
60 *
61 * sext <4 x i1> %C to <4 x i32>
62 *
63 * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
64 * it causes assertion failures in LLVM 2.6. It appears to work correctly on
65 * LLVM 2.7.
66 */
67
68
69 /**
70 * Build code to compare two values 'a' and 'b' of 'type' using the given func.
71 * \param func one of PIPE_FUNC_x
72 * If the ordered argument is true the function will use LLVM's ordered
73 * comparisons, otherwise unordered comparisons will be used.
74 * The result values will be 0 for false or ~0 for true.
75 */
76 static LLVMValueRef
lp_build_compare_ext(struct gallivm_state * gallivm,const struct lp_type type,unsigned func,LLVMValueRef a,LLVMValueRef b,boolean ordered)77 lp_build_compare_ext(struct gallivm_state *gallivm,
78 const struct lp_type type,
79 unsigned func,
80 LLVMValueRef a,
81 LLVMValueRef b,
82 boolean ordered)
83 {
84 LLVMBuilderRef builder = gallivm->builder;
85 LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
86 LLVMValueRef zeros = LLVMConstNull(int_vec_type);
87 LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
88 LLVMValueRef cond;
89 LLVMValueRef res;
90
91 assert(lp_check_value(type, a));
92 assert(lp_check_value(type, b));
93
94 if(func == PIPE_FUNC_NEVER)
95 return zeros;
96 if(func == PIPE_FUNC_ALWAYS)
97 return ones;
98
99 assert(func > PIPE_FUNC_NEVER);
100 assert(func < PIPE_FUNC_ALWAYS);
101
102 if(type.floating) {
103 LLVMRealPredicate op;
104 switch(func) {
105 case PIPE_FUNC_EQUAL:
106 op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
107 break;
108 case PIPE_FUNC_NOTEQUAL:
109 op = ordered ? LLVMRealONE : LLVMRealUNE;
110 break;
111 case PIPE_FUNC_LESS:
112 op = ordered ? LLVMRealOLT : LLVMRealULT;
113 break;
114 case PIPE_FUNC_LEQUAL:
115 op = ordered ? LLVMRealOLE : LLVMRealULE;
116 break;
117 case PIPE_FUNC_GREATER:
118 op = ordered ? LLVMRealOGT : LLVMRealUGT;
119 break;
120 case PIPE_FUNC_GEQUAL:
121 op = ordered ? LLVMRealOGE : LLVMRealUGE;
122 break;
123 default:
124 assert(0);
125 return lp_build_undef(gallivm, type);
126 }
127
128 cond = LLVMBuildFCmp(builder, op, a, b, "");
129 res = LLVMBuildSExt(builder, cond, int_vec_type, "");
130 }
131 else {
132 LLVMIntPredicate op;
133 switch(func) {
134 case PIPE_FUNC_EQUAL:
135 op = LLVMIntEQ;
136 break;
137 case PIPE_FUNC_NOTEQUAL:
138 op = LLVMIntNE;
139 break;
140 case PIPE_FUNC_LESS:
141 op = type.sign ? LLVMIntSLT : LLVMIntULT;
142 break;
143 case PIPE_FUNC_LEQUAL:
144 op = type.sign ? LLVMIntSLE : LLVMIntULE;
145 break;
146 case PIPE_FUNC_GREATER:
147 op = type.sign ? LLVMIntSGT : LLVMIntUGT;
148 break;
149 case PIPE_FUNC_GEQUAL:
150 op = type.sign ? LLVMIntSGE : LLVMIntUGE;
151 break;
152 default:
153 assert(0);
154 return lp_build_undef(gallivm, type);
155 }
156
157 cond = LLVMBuildICmp(builder, op, a, b, "");
158 res = LLVMBuildSExt(builder, cond, int_vec_type, "");
159 }
160
161 return res;
162 }
163
164 /**
165 * Build code to compare two values 'a' and 'b' of 'type' using the given func.
166 * \param func one of PIPE_FUNC_x
167 * The result values will be 0 for false or ~0 for true.
168 */
169 LLVMValueRef
lp_build_compare(struct gallivm_state * gallivm,const struct lp_type type,unsigned func,LLVMValueRef a,LLVMValueRef b)170 lp_build_compare(struct gallivm_state *gallivm,
171 const struct lp_type type,
172 unsigned func,
173 LLVMValueRef a,
174 LLVMValueRef b)
175 {
176 LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
177 LLVMValueRef zeros = LLVMConstNull(int_vec_type);
178 LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
179
180 assert(lp_check_value(type, a));
181 assert(lp_check_value(type, b));
182
183 if(func == PIPE_FUNC_NEVER)
184 return zeros;
185 if(func == PIPE_FUNC_ALWAYS)
186 return ones;
187
188 assert(func > PIPE_FUNC_NEVER);
189 assert(func < PIPE_FUNC_ALWAYS);
190
191 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
192 /*
193 * There are no unsigned integer comparison instructions in SSE.
194 */
195
196 if (!type.floating && !type.sign &&
197 type.width * type.length == 128 &&
198 util_cpu_caps.has_sse2 &&
199 (func == PIPE_FUNC_LESS ||
200 func == PIPE_FUNC_LEQUAL ||
201 func == PIPE_FUNC_GREATER ||
202 func == PIPE_FUNC_GEQUAL) &&
203 (gallivm_debug & GALLIVM_DEBUG_PERF)) {
204 debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
205 __FUNCTION__, type.length, type.width);
206 }
207 #endif
208
209 return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
210 }
211
212 /**
213 * Build code to compare two values 'a' and 'b' using the given func.
214 * \param func one of PIPE_FUNC_x
215 * If the operands are floating point numbers, the function will use
216 * ordered comparison which means that it will return true if both
217 * operands are not a NaN and the specified condition evaluates to true.
218 * The result values will be 0 for false or ~0 for true.
219 */
220 LLVMValueRef
lp_build_cmp_ordered(struct lp_build_context * bld,unsigned func,LLVMValueRef a,LLVMValueRef b)221 lp_build_cmp_ordered(struct lp_build_context *bld,
222 unsigned func,
223 LLVMValueRef a,
224 LLVMValueRef b)
225 {
226 return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
227 }
228
229 /**
230 * Build code to compare two values 'a' and 'b' using the given func.
231 * \param func one of PIPE_FUNC_x
232 * If the operands are floating point numbers, the function will use
233 * unordered comparison which means that it will return true if either
234 * operand is a NaN or the specified condition evaluates to true.
235 * The result values will be 0 for false or ~0 for true.
236 */
237 LLVMValueRef
lp_build_cmp(struct lp_build_context * bld,unsigned func,LLVMValueRef a,LLVMValueRef b)238 lp_build_cmp(struct lp_build_context *bld,
239 unsigned func,
240 LLVMValueRef a,
241 LLVMValueRef b)
242 {
243 return lp_build_compare(bld->gallivm, bld->type, func, a, b);
244 }
245
246
247 /**
248 * Return (mask & a) | (~mask & b);
249 */
250 LLVMValueRef
lp_build_select_bitwise(struct lp_build_context * bld,LLVMValueRef mask,LLVMValueRef a,LLVMValueRef b)251 lp_build_select_bitwise(struct lp_build_context *bld,
252 LLVMValueRef mask,
253 LLVMValueRef a,
254 LLVMValueRef b)
255 {
256 LLVMBuilderRef builder = bld->gallivm->builder;
257 struct lp_type type = bld->type;
258 LLVMValueRef res;
259
260 assert(lp_check_value(type, a));
261 assert(lp_check_value(type, b));
262
263 if (a == b) {
264 return a;
265 }
266
267 if(type.floating) {
268 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
269 a = LLVMBuildBitCast(builder, a, int_vec_type, "");
270 b = LLVMBuildBitCast(builder, b, int_vec_type, "");
271 }
272
273 a = LLVMBuildAnd(builder, a, mask, "");
274
275 /* This often gets translated to PANDN, but sometimes the NOT is
276 * pre-computed and stored in another constant. The best strategy depends
277 * on available registers, so it is not a big deal -- hopefully LLVM does
278 * the right decision attending the rest of the program.
279 */
280 b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
281
282 res = LLVMBuildOr(builder, a, b, "");
283
284 if(type.floating) {
285 LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
286 res = LLVMBuildBitCast(builder, res, vec_type, "");
287 }
288
289 return res;
290 }
291
292
293 /**
294 * Return mask ? a : b;
295 *
296 * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
297 * will yield unpredictable results.
298 */
299 LLVMValueRef
lp_build_select(struct lp_build_context * bld,LLVMValueRef mask,LLVMValueRef a,LLVMValueRef b)300 lp_build_select(struct lp_build_context *bld,
301 LLVMValueRef mask,
302 LLVMValueRef a,
303 LLVMValueRef b)
304 {
305 LLVMBuilderRef builder = bld->gallivm->builder;
306 LLVMContextRef lc = bld->gallivm->context;
307 struct lp_type type = bld->type;
308 LLVMValueRef res;
309
310 assert(lp_check_value(type, a));
311 assert(lp_check_value(type, b));
312
313 if(a == b)
314 return a;
315
316 if (type.length == 1) {
317 mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
318 res = LLVMBuildSelect(builder, mask, a, b, "");
319 }
320 else if (!(HAVE_LLVM == 0x0307) &&
321 (LLVMIsConstant(mask) ||
322 LLVMGetInstructionOpcode(mask) == LLVMSExt)) {
323 /* Generate a vector select.
324 *
325 * Using vector selects should avoid emitting intrinsics hence avoid
326 * hindering optimization passes, but vector selects weren't properly
327 * supported yet for a long time, and LLVM will generate poor code when
328 * the mask is not the result of a comparison.
329 * Also, llvm 3.7 may miscompile them (bug 94972).
330 * XXX: Even if the instruction was an SExt, this may still produce
331 * terrible code. Try piglit stencil-twoside.
332 */
333
334 /* Convert the mask to a vector of booleans.
335 *
336 * XXX: In x86 the mask is controlled by the MSB, so if we shifted the
337 * mask by `type.width - 1`, LLVM should realize the mask is ready. Alas
338 * what really happens is that LLVM will emit two shifts back to back.
339 */
340 if (0) {
341 LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0);
342 shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift);
343 mask = LLVMBuildLShr(builder, mask, shift, "");
344 }
345 LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
346 mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
347
348 res = LLVMBuildSelect(builder, mask, a, b, "");
349 }
350 else if (((util_cpu_caps.has_sse4_1 &&
351 type.width * type.length == 128) ||
352 (util_cpu_caps.has_avx &&
353 type.width * type.length == 256 && type.width >= 32) ||
354 (util_cpu_caps.has_avx2 &&
355 type.width * type.length == 256)) &&
356 !LLVMIsConstant(a) &&
357 !LLVMIsConstant(b) &&
358 !LLVMIsConstant(mask)) {
359 const char *intrinsic;
360 LLVMTypeRef arg_type;
361 LLVMValueRef args[3];
362
363 /*
364 * There's only float blend in AVX but can just cast i32/i64
365 * to float.
366 */
367 if (type.width * type.length == 256) {
368 if (type.width == 64) {
369 intrinsic = "llvm.x86.avx.blendv.pd.256";
370 arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
371 }
372 else if (type.width == 32) {
373 intrinsic = "llvm.x86.avx.blendv.ps.256";
374 arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
375 } else {
376 assert(util_cpu_caps.has_avx2);
377 intrinsic = "llvm.x86.avx2.pblendvb";
378 arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32);
379 }
380 }
381 else if (type.floating &&
382 type.width == 64) {
383 intrinsic = "llvm.x86.sse41.blendvpd";
384 arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
385 } else if (type.floating &&
386 type.width == 32) {
387 intrinsic = "llvm.x86.sse41.blendvps";
388 arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
389 } else {
390 intrinsic = "llvm.x86.sse41.pblendvb";
391 arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
392 }
393
394 if (arg_type != bld->int_vec_type) {
395 mask = LLVMBuildBitCast(builder, mask, arg_type, "");
396 }
397
398 if (arg_type != bld->vec_type) {
399 a = LLVMBuildBitCast(builder, a, arg_type, "");
400 b = LLVMBuildBitCast(builder, b, arg_type, "");
401 }
402
403 args[0] = b;
404 args[1] = a;
405 args[2] = mask;
406
407 res = lp_build_intrinsic(builder, intrinsic,
408 arg_type, args, ARRAY_SIZE(args), 0);
409
410 if (arg_type != bld->vec_type) {
411 res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
412 }
413 }
414 else {
415 res = lp_build_select_bitwise(bld, mask, a, b);
416 }
417
418 return res;
419 }
420
421
422 /**
423 * Return mask ? a : b;
424 *
425 * mask is a TGSI_WRITEMASK_xxx.
426 */
427 LLVMValueRef
lp_build_select_aos(struct lp_build_context * bld,unsigned mask,LLVMValueRef a,LLVMValueRef b,unsigned num_channels)428 lp_build_select_aos(struct lp_build_context *bld,
429 unsigned mask,
430 LLVMValueRef a,
431 LLVMValueRef b,
432 unsigned num_channels)
433 {
434 LLVMBuilderRef builder = bld->gallivm->builder;
435 const struct lp_type type = bld->type;
436 const unsigned n = type.length;
437 unsigned i, j;
438
439 assert((mask & ~0xf) == 0);
440 assert(lp_check_value(type, a));
441 assert(lp_check_value(type, b));
442
443 if(a == b)
444 return a;
445 if((mask & 0xf) == 0xf)
446 return a;
447 if((mask & 0xf) == 0x0)
448 return b;
449 if(a == bld->undef || b == bld->undef)
450 return bld->undef;
451
452 /*
453 * There are two major ways of accomplishing this:
454 * - with a shuffle
455 * - with a select
456 *
457 * The flip between these is empirical and might need to be adjusted.
458 */
459 if (n <= 4) {
460 /*
461 * Shuffle.
462 */
463 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
464 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
465
466 for(j = 0; j < n; j += num_channels)
467 for(i = 0; i < num_channels; ++i)
468 shuffles[j + i] = LLVMConstInt(elem_type,
469 (mask & (1 << i) ? 0 : n) + j + i,
470 0);
471
472 return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
473 }
474 else {
475 LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
476 return lp_build_select(bld, mask_vec, a, b);
477 }
478 }
479
480
481 /**
482 * Return (scalar-cast)val ? true : false;
483 */
484 LLVMValueRef
lp_build_any_true_range(struct lp_build_context * bld,unsigned real_length,LLVMValueRef val)485 lp_build_any_true_range(struct lp_build_context *bld,
486 unsigned real_length,
487 LLVMValueRef val)
488 {
489 LLVMBuilderRef builder = bld->gallivm->builder;
490 LLVMTypeRef scalar_type;
491 LLVMTypeRef true_type;
492
493 assert(real_length <= bld->type.length);
494
495 true_type = LLVMIntTypeInContext(bld->gallivm->context,
496 bld->type.width * real_length);
497 scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
498 bld->type.width * bld->type.length);
499 val = LLVMBuildBitCast(builder, val, scalar_type, "");
500 /*
501 * We're using always native types so we can use intrinsics.
502 * However, if we don't do per-element calculations, we must ensure
503 * the excess elements aren't used since they may contain garbage.
504 */
505 if (real_length < bld->type.length) {
506 val = LLVMBuildTrunc(builder, val, true_type, "");
507 }
508 return LLVMBuildICmp(builder, LLVMIntNE,
509 val, LLVMConstNull(true_type), "");
510 }
511