1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_algebraic.cpp
26 *
27 * Takes advantage of association, commutivity, and other algebraic
28 * properties to simplify expressions.
29 */
30
31 #include "ir.h"
32 #include "ir_visitor.h"
33 #include "ir_rvalue_visitor.h"
34 #include "ir_optimization.h"
35 #include "ir_builder.h"
36 #include "compiler/glsl_types.h"
37
38 using namespace ir_builder;
39
40 namespace {
41
42 /**
43 * Visitor class for replacing expressions with ir_constant values.
44 */
45
46 class ir_algebraic_visitor : public ir_rvalue_visitor {
47 public:
ir_algebraic_visitor(bool native_integers,const struct gl_shader_compiler_options * options)48 ir_algebraic_visitor(bool native_integers,
49 const struct gl_shader_compiler_options *options)
50 : options(options)
51 {
52 this->progress = false;
53 this->mem_ctx = NULL;
54 this->native_integers = native_integers;
55 }
56
~ir_algebraic_visitor()57 virtual ~ir_algebraic_visitor()
58 {
59 }
60
61 virtual ir_visitor_status visit_enter(ir_assignment *ir);
62
63 ir_rvalue *handle_expression(ir_expression *ir);
64 void handle_rvalue(ir_rvalue **rvalue);
65 bool reassociate_constant(ir_expression *ir1,
66 int const_index,
67 ir_constant *constant,
68 ir_expression *ir2);
69 void reassociate_operands(ir_expression *ir1,
70 int op1,
71 ir_expression *ir2,
72 int op2);
73 ir_rvalue *swizzle_if_required(ir_expression *expr,
74 ir_rvalue *operand);
75
76 const struct gl_shader_compiler_options *options;
77 void *mem_ctx;
78
79 bool native_integers;
80 bool progress;
81 };
82
83 } /* unnamed namespace */
84
85 ir_visitor_status
visit_enter(ir_assignment * ir)86 ir_algebraic_visitor::visit_enter(ir_assignment *ir)
87 {
88 ir_variable *var = ir->lhs->variable_referenced();
89 if (var->data.invariant || var->data.precise) {
90 /* If we're assigning to an invariant or precise variable, just bail.
91 * Most of the algebraic optimizations aren't precision-safe.
92 *
93 * FINISHME: Find out which optimizations are precision-safe and enable
94 * then only for invariant or precise trees.
95 */
96 return visit_continue_with_parent;
97 } else {
98 return visit_continue;
99 }
100 }
101
102 static inline bool
is_vec_zero(ir_constant * ir)103 is_vec_zero(ir_constant *ir)
104 {
105 return (ir == NULL) ? false : ir->is_zero();
106 }
107
108 static inline bool
is_vec_one(ir_constant * ir)109 is_vec_one(ir_constant *ir)
110 {
111 return (ir == NULL) ? false : ir->is_one();
112 }
113
114 static inline bool
is_vec_two(ir_constant * ir)115 is_vec_two(ir_constant *ir)
116 {
117 return (ir == NULL) ? false : ir->is_value(2.0, 2);
118 }
119
120 static inline bool
is_vec_four(ir_constant * ir)121 is_vec_four(ir_constant *ir)
122 {
123 return (ir == NULL) ? false : ir->is_value(4.0, 4);
124 }
125
126 static inline bool
is_vec_negative_one(ir_constant * ir)127 is_vec_negative_one(ir_constant *ir)
128 {
129 return (ir == NULL) ? false : ir->is_negative_one();
130 }
131
132 static inline bool
is_valid_vec_const(ir_constant * ir)133 is_valid_vec_const(ir_constant *ir)
134 {
135 if (ir == NULL)
136 return false;
137
138 if (!ir->type->is_scalar() && !ir->type->is_vector())
139 return false;
140
141 return true;
142 }
143
144 static inline bool
is_less_than_one(ir_constant * ir)145 is_less_than_one(ir_constant *ir)
146 {
147 assert(ir->type->is_float());
148
149 if (!is_valid_vec_const(ir))
150 return false;
151
152 unsigned component = 0;
153 for (int c = 0; c < ir->type->vector_elements; c++) {
154 if (ir->get_float_component(c) < 1.0f)
155 component++;
156 }
157
158 return (component == ir->type->vector_elements);
159 }
160
161 static inline bool
is_greater_than_zero(ir_constant * ir)162 is_greater_than_zero(ir_constant *ir)
163 {
164 assert(ir->type->is_float());
165
166 if (!is_valid_vec_const(ir))
167 return false;
168
169 unsigned component = 0;
170 for (int c = 0; c < ir->type->vector_elements; c++) {
171 if (ir->get_float_component(c) > 0.0f)
172 component++;
173 }
174
175 return (component == ir->type->vector_elements);
176 }
177
178 static void
update_type(ir_expression * ir)179 update_type(ir_expression *ir)
180 {
181 if (ir->operands[0]->type->is_vector())
182 ir->type = ir->operands[0]->type;
183 else
184 ir->type = ir->operands[1]->type;
185 }
186
187 /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
188 static ir_expression *
try_replace_with_dot(ir_expression * expr0,ir_expression * expr1,void * mem_ctx)189 try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx)
190 {
191 if (expr0 && expr0->operation == ir_binop_add &&
192 expr0->type->is_float() &&
193 expr1 && expr1->operation == ir_binop_add &&
194 expr1->type->is_float()) {
195 ir_swizzle *x = expr0->operands[0]->as_swizzle();
196 ir_swizzle *y = expr0->operands[1]->as_swizzle();
197 ir_swizzle *z = expr1->operands[0]->as_swizzle();
198 ir_swizzle *w = expr1->operands[1]->as_swizzle();
199
200 if (!x || x->mask.num_components != 1 ||
201 !y || y->mask.num_components != 1 ||
202 !z || z->mask.num_components != 1 ||
203 !w || w->mask.num_components != 1) {
204 return NULL;
205 }
206
207 bool swiz_seen[4] = {false, false, false, false};
208 swiz_seen[x->mask.x] = true;
209 swiz_seen[y->mask.x] = true;
210 swiz_seen[z->mask.x] = true;
211 swiz_seen[w->mask.x] = true;
212
213 if (!swiz_seen[0] || !swiz_seen[1] ||
214 !swiz_seen[2] || !swiz_seen[3]) {
215 return NULL;
216 }
217
218 if (x->val->equals(y->val) &&
219 x->val->equals(z->val) &&
220 x->val->equals(w->val)) {
221 return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4));
222 }
223 }
224 return NULL;
225 }
226
227 void
reassociate_operands(ir_expression * ir1,int op1,ir_expression * ir2,int op2)228 ir_algebraic_visitor::reassociate_operands(ir_expression *ir1,
229 int op1,
230 ir_expression *ir2,
231 int op2)
232 {
233 ir_rvalue *temp = ir2->operands[op2];
234 ir2->operands[op2] = ir1->operands[op1];
235 ir1->operands[op1] = temp;
236
237 /* Update the type of ir2. The type of ir1 won't have changed --
238 * base types matched, and at least one of the operands of the 2
239 * binops is still a vector if any of them were.
240 */
241 update_type(ir2);
242
243 this->progress = true;
244 }
245
246 /**
247 * Reassociates a constant down a tree of adds or multiplies.
248 *
249 * Consider (2 * (a * (b * 0.5))). We want to end up with a * b.
250 */
251 bool
reassociate_constant(ir_expression * ir1,int const_index,ir_constant * constant,ir_expression * ir2)252 ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index,
253 ir_constant *constant,
254 ir_expression *ir2)
255 {
256 if (!ir2 || ir1->operation != ir2->operation)
257 return false;
258
259 /* Don't want to even think about matrices. */
260 if (ir1->operands[0]->type->is_matrix() ||
261 ir1->operands[1]->type->is_matrix() ||
262 ir2->operands[0]->type->is_matrix() ||
263 ir2->operands[1]->type->is_matrix())
264 return false;
265
266 void *mem_ctx = ralloc_parent(ir2);
267
268 ir_constant *ir2_const[2];
269 ir2_const[0] = ir2->operands[0]->constant_expression_value(mem_ctx);
270 ir2_const[1] = ir2->operands[1]->constant_expression_value(mem_ctx);
271
272 if (ir2_const[0] && ir2_const[1])
273 return false;
274
275 if (ir2_const[0]) {
276 reassociate_operands(ir1, const_index, ir2, 1);
277 return true;
278 } else if (ir2_const[1]) {
279 reassociate_operands(ir1, const_index, ir2, 0);
280 return true;
281 }
282
283 if (reassociate_constant(ir1, const_index, constant,
284 ir2->operands[0]->as_expression())) {
285 update_type(ir2);
286 return true;
287 }
288
289 if (reassociate_constant(ir1, const_index, constant,
290 ir2->operands[1]->as_expression())) {
291 update_type(ir2);
292 return true;
293 }
294
295 return false;
296 }
297
298 /* When eliminating an expression and just returning one of its operands,
299 * we may need to swizzle that operand out to a vector if the expression was
300 * vector type.
301 */
302 ir_rvalue *
swizzle_if_required(ir_expression * expr,ir_rvalue * operand)303 ir_algebraic_visitor::swizzle_if_required(ir_expression *expr,
304 ir_rvalue *operand)
305 {
306 if (expr->type->is_vector() && operand->type->is_scalar()) {
307 return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0,
308 expr->type->vector_elements);
309 } else
310 return operand;
311 }
312
313 ir_rvalue *
handle_expression(ir_expression * ir)314 ir_algebraic_visitor::handle_expression(ir_expression *ir)
315 {
316 ir_constant *op_const[4] = {NULL, NULL, NULL, NULL};
317 ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL};
318
319 if (ir->operation == ir_binop_mul &&
320 ir->operands[0]->type->is_matrix() &&
321 ir->operands[1]->type->is_vector()) {
322 ir_expression *matrix_mul = ir->operands[0]->as_expression();
323
324 if (matrix_mul && matrix_mul->operation == ir_binop_mul &&
325 matrix_mul->operands[0]->type->is_matrix() &&
326 matrix_mul->operands[1]->type->is_matrix()) {
327
328 return mul(matrix_mul->operands[0],
329 mul(matrix_mul->operands[1], ir->operands[1]));
330 }
331 }
332
333 assert(ir->num_operands <= 4);
334 for (unsigned i = 0; i < ir->num_operands; i++) {
335 if (ir->operands[i]->type->is_matrix())
336 return ir;
337
338 op_const[i] =
339 ir->operands[i]->constant_expression_value(ralloc_parent(ir));
340 op_expr[i] = ir->operands[i]->as_expression();
341 }
342
343 if (this->mem_ctx == NULL)
344 this->mem_ctx = ralloc_parent(ir);
345
346 switch (ir->operation) {
347 case ir_unop_bit_not:
348 if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not)
349 return op_expr[0]->operands[0];
350 break;
351
352 case ir_unop_abs:
353 if (op_expr[0] == NULL)
354 break;
355
356 switch (op_expr[0]->operation) {
357 case ir_unop_abs:
358 case ir_unop_neg:
359 return abs(op_expr[0]->operands[0]);
360 default:
361 break;
362 }
363 break;
364
365 case ir_unop_neg:
366 if (op_expr[0] == NULL)
367 break;
368
369 if (op_expr[0]->operation == ir_unop_neg) {
370 return op_expr[0]->operands[0];
371 }
372 break;
373
374 case ir_unop_exp:
375 if (op_expr[0] == NULL)
376 break;
377
378 if (op_expr[0]->operation == ir_unop_log) {
379 return op_expr[0]->operands[0];
380 }
381 break;
382
383 case ir_unop_log:
384 if (op_expr[0] == NULL)
385 break;
386
387 if (op_expr[0]->operation == ir_unop_exp) {
388 return op_expr[0]->operands[0];
389 }
390 break;
391
392 case ir_unop_exp2:
393 if (op_expr[0] == NULL)
394 break;
395
396 if (op_expr[0]->operation == ir_unop_log2) {
397 return op_expr[0]->operands[0];
398 }
399
400 if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) {
401 for (int log2_pos = 0; log2_pos < 2; log2_pos++) {
402 ir_expression *log2_expr =
403 op_expr[0]->operands[log2_pos]->as_expression();
404
405 if (log2_expr && log2_expr->operation == ir_unop_log2) {
406 return new(mem_ctx) ir_expression(ir_binop_pow,
407 ir->type,
408 log2_expr->operands[0],
409 op_expr[0]->operands[1 - log2_pos]);
410 }
411 }
412 }
413 break;
414
415 case ir_unop_log2:
416 if (op_expr[0] == NULL)
417 break;
418
419 if (op_expr[0]->operation == ir_unop_exp2) {
420 return op_expr[0]->operands[0];
421 }
422 break;
423
424 case ir_unop_f2i:
425 case ir_unop_f2u:
426 if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) {
427 return new(mem_ctx) ir_expression(ir->operation,
428 ir->type,
429 op_expr[0]->operands[0]);
430 }
431 break;
432
433 case ir_unop_logic_not: {
434 enum ir_expression_operation new_op = ir_unop_logic_not;
435
436 if (op_expr[0] == NULL)
437 break;
438
439 switch (op_expr[0]->operation) {
440 case ir_binop_less: new_op = ir_binop_gequal; break;
441 case ir_binop_gequal: new_op = ir_binop_less; break;
442 case ir_binop_equal: new_op = ir_binop_nequal; break;
443 case ir_binop_nequal: new_op = ir_binop_equal; break;
444 case ir_binop_all_equal: new_op = ir_binop_any_nequal; break;
445 case ir_binop_any_nequal: new_op = ir_binop_all_equal; break;
446
447 default:
448 /* The default case handler is here to silence a warning from GCC.
449 */
450 break;
451 }
452
453 if (new_op != ir_unop_logic_not) {
454 return new(mem_ctx) ir_expression(new_op,
455 ir->type,
456 op_expr[0]->operands[0],
457 op_expr[0]->operands[1]);
458 }
459
460 break;
461 }
462
463 case ir_unop_saturate:
464 if (op_expr[0] && op_expr[0]->operation == ir_binop_add) {
465 ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression();
466 ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression();
467
468 if (b2f_0 && b2f_0->operation == ir_unop_b2f &&
469 b2f_1 && b2f_1->operation == ir_unop_b2f) {
470 return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0]));
471 }
472 }
473 break;
474
475 /* This macro CANNOT use the do { } while(true) mechanism because
476 * then the breaks apply to the loop instead of the switch!
477 */
478 #define HANDLE_PACK_UNPACK_INVERSE(inverse_operation) \
479 { \
480 ir_expression *const op = ir->operands[0]->as_expression(); \
481 if (op == NULL) \
482 break; \
483 if (op->operation == (inverse_operation)) \
484 return op->operands[0]; \
485 break; \
486 }
487
488 case ir_unop_unpack_uint_2x32:
489 HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_uint_2x32);
490 case ir_unop_pack_uint_2x32:
491 HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_uint_2x32);
492 case ir_unop_unpack_int_2x32:
493 HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_int_2x32);
494 case ir_unop_pack_int_2x32:
495 HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_int_2x32);
496 case ir_unop_unpack_double_2x32:
497 HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_double_2x32);
498 case ir_unop_pack_double_2x32:
499 HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_double_2x32);
500
501 #undef HANDLE_PACK_UNPACK_INVERSE
502
503 case ir_binop_add:
504 if (is_vec_zero(op_const[0]))
505 return ir->operands[1];
506 if (is_vec_zero(op_const[1]))
507 return ir->operands[0];
508
509 /* Reassociate addition of constants so that we can do constant
510 * folding.
511 */
512 if (op_const[0] && !op_const[1])
513 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
514 if (op_const[1] && !op_const[0])
515 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
516
517 /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
518 if (options->OptimizeForAOS) {
519 ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1],
520 mem_ctx);
521 if (expr)
522 return expr;
523 }
524
525 /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a).
526 *
527 * (-x + y) * a + x
528 * (x * -a) + (y * a) + x
529 * x + (x * -a) + (y * a)
530 * x * (1 - a) + y * a
531 * lrp(x, y, a)
532 */
533 for (int mul_pos = 0; mul_pos < 2; mul_pos++) {
534 ir_expression *mul = op_expr[mul_pos];
535
536 if (!mul || mul->operation != ir_binop_mul)
537 continue;
538
539 /* Multiply found on one of the operands. Now check for an
540 * inner addition operation.
541 */
542 for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) {
543 ir_expression *inner_add =
544 mul->operands[inner_add_pos]->as_expression();
545
546 if (!inner_add || inner_add->operation != ir_binop_add)
547 continue;
548
549 /* Inner addition found on one of the operands. Now check for
550 * one of the operands of the inner addition to be the negative
551 * of x_operand.
552 */
553 for (int neg_pos = 0; neg_pos < 2; neg_pos++) {
554 ir_expression *neg =
555 inner_add->operands[neg_pos]->as_expression();
556
557 if (!neg || neg->operation != ir_unop_neg)
558 continue;
559
560 ir_rvalue *x_operand = ir->operands[1 - mul_pos];
561
562 if (!neg->operands[0]->equals(x_operand))
563 continue;
564
565 ir_rvalue *y_operand = inner_add->operands[1 - neg_pos];
566 ir_rvalue *a_operand = mul->operands[1 - inner_add_pos];
567
568 if (x_operand->type != y_operand->type ||
569 x_operand->type != a_operand->type)
570 continue;
571
572 return lrp(x_operand, y_operand, a_operand);
573 }
574 }
575 }
576
577 break;
578
579 case ir_binop_sub:
580 if (is_vec_zero(op_const[0]))
581 return neg(ir->operands[1]);
582 if (is_vec_zero(op_const[1]))
583 return ir->operands[0];
584 break;
585
586 case ir_binop_mul:
587 if (is_vec_one(op_const[0]))
588 return ir->operands[1];
589 if (is_vec_one(op_const[1]))
590 return ir->operands[0];
591
592 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
593 return ir_constant::zero(ir, ir->type);
594
595 if (is_vec_negative_one(op_const[0]))
596 return neg(ir->operands[1]);
597 if (is_vec_negative_one(op_const[1]))
598 return neg(ir->operands[0]);
599
600 if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f &&
601 op_expr[1] && op_expr[1]->operation == ir_unop_b2f) {
602 return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0]));
603 }
604
605 /* Reassociate multiplication of constants so that we can do
606 * constant folding.
607 */
608 if (op_const[0] && !op_const[1])
609 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
610 if (op_const[1] && !op_const[0])
611 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
612
613 /* Optimizes
614 *
615 * (mul (floor (add (abs x) 0.5) (sign x)))
616 *
617 * into
618 *
619 * (trunc (add x (mul (sign x) 0.5)))
620 */
621 for (int i = 0; i < 2; i++) {
622 ir_expression *sign_expr = ir->operands[i]->as_expression();
623 ir_expression *floor_expr = ir->operands[1 - i]->as_expression();
624
625 if (!sign_expr || sign_expr->operation != ir_unop_sign ||
626 !floor_expr || floor_expr->operation != ir_unop_floor)
627 continue;
628
629 ir_expression *add_expr = floor_expr->operands[0]->as_expression();
630 if (!add_expr || add_expr->operation != ir_binop_add)
631 continue;
632
633 for (int j = 0; j < 2; j++) {
634 ir_expression *abs_expr = add_expr->operands[j]->as_expression();
635 if (!abs_expr || abs_expr->operation != ir_unop_abs)
636 continue;
637
638 ir_constant *point_five = add_expr->operands[1 - j]->as_constant();
639 if (!point_five || !point_five->is_value(0.5, 0))
640 continue;
641
642 if (abs_expr->operands[0]->equals(sign_expr->operands[0])) {
643 return trunc(add(abs_expr->operands[0],
644 mul(sign_expr, point_five)));
645 }
646 }
647 }
648 break;
649
650 case ir_binop_div:
651 if (is_vec_one(op_const[0]) && (
652 ir->type->is_float() || ir->type->is_double())) {
653 return new(mem_ctx) ir_expression(ir_unop_rcp,
654 ir->operands[1]->type,
655 ir->operands[1],
656 NULL);
657 }
658 if (is_vec_one(op_const[1]))
659 return ir->operands[0];
660 break;
661
662 case ir_binop_dot:
663 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
664 return ir_constant::zero(mem_ctx, ir->type);
665
666 for (int i = 0; i < 2; i++) {
667 if (!op_const[i])
668 continue;
669
670 unsigned components[4] = { 0 }, count = 0;
671
672 for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) {
673 if (op_const[i]->is_zero())
674 continue;
675
676 components[count] = c;
677 count++;
678 }
679
680 /* No channels had zero values; bail. */
681 if (count >= op_const[i]->type->vector_elements)
682 break;
683
684 ir_expression_operation op = count == 1 ?
685 ir_binop_mul : ir_binop_dot;
686
687 /* Swizzle both operands to remove the channels that were zero. */
688 return new(mem_ctx)
689 ir_expression(op, ir->type,
690 new(mem_ctx) ir_swizzle(ir->operands[0],
691 components, count),
692 new(mem_ctx) ir_swizzle(ir->operands[1],
693 components, count));
694 }
695 break;
696
697 case ir_binop_less:
698 case ir_binop_gequal:
699 case ir_binop_equal:
700 case ir_binop_nequal:
701 for (int add_pos = 0; add_pos < 2; add_pos++) {
702 ir_expression *add = op_expr[add_pos];
703
704 if (!add || add->operation != ir_binop_add)
705 continue;
706
707 ir_constant *zero = op_const[1 - add_pos];
708 if (!is_vec_zero(zero))
709 continue;
710
711 /* Depending of the zero position we want to optimize
712 * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y)
713 */
714 if (add_pos == 1) {
715 return new(mem_ctx) ir_expression(ir->operation,
716 neg(add->operands[0]),
717 add->operands[1]);
718 } else {
719 return new(mem_ctx) ir_expression(ir->operation,
720 add->operands[0],
721 neg(add->operands[1]));
722 }
723 }
724 break;
725
726 case ir_binop_all_equal:
727 case ir_binop_any_nequal:
728 if (ir->operands[0]->type->is_scalar() &&
729 ir->operands[1]->type->is_scalar())
730 return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal
731 ? ir_binop_equal : ir_binop_nequal,
732 ir->operands[0],
733 ir->operands[1]);
734 break;
735
736 case ir_binop_rshift:
737 case ir_binop_lshift:
738 /* 0 >> x == 0 */
739 if (is_vec_zero(op_const[0]))
740 return ir->operands[0];
741 /* x >> 0 == x */
742 if (is_vec_zero(op_const[1]))
743 return ir->operands[0];
744 break;
745
746 case ir_binop_logic_and:
747 if (is_vec_one(op_const[0])) {
748 return ir->operands[1];
749 } else if (is_vec_one(op_const[1])) {
750 return ir->operands[0];
751 } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
752 return ir_constant::zero(mem_ctx, ir->type);
753 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
754 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
755 /* De Morgan's Law:
756 * (not A) and (not B) === not (A or B)
757 */
758 return logic_not(logic_or(op_expr[0]->operands[0],
759 op_expr[1]->operands[0]));
760 } else if (ir->operands[0]->equals(ir->operands[1])) {
761 /* (a && a) == a */
762 return ir->operands[0];
763 }
764 break;
765
766 case ir_binop_logic_xor:
767 if (is_vec_zero(op_const[0])) {
768 return ir->operands[1];
769 } else if (is_vec_zero(op_const[1])) {
770 return ir->operands[0];
771 } else if (is_vec_one(op_const[0])) {
772 return logic_not(ir->operands[1]);
773 } else if (is_vec_one(op_const[1])) {
774 return logic_not(ir->operands[0]);
775 } else if (ir->operands[0]->equals(ir->operands[1])) {
776 /* (a ^^ a) == false */
777 return ir_constant::zero(mem_ctx, ir->type);
778 }
779 break;
780
781 case ir_binop_logic_or:
782 if (is_vec_zero(op_const[0])) {
783 return ir->operands[1];
784 } else if (is_vec_zero(op_const[1])) {
785 return ir->operands[0];
786 } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) {
787 ir_constant_data data;
788
789 for (unsigned i = 0; i < 16; i++)
790 data.b[i] = true;
791
792 return new(mem_ctx) ir_constant(ir->type, &data);
793 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
794 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
795 /* De Morgan's Law:
796 * (not A) or (not B) === not (A and B)
797 */
798 return logic_not(logic_and(op_expr[0]->operands[0],
799 op_expr[1]->operands[0]));
800 } else if (ir->operands[0]->equals(ir->operands[1])) {
801 /* (a || a) == a */
802 return ir->operands[0];
803 }
804 break;
805
806 case ir_binop_pow:
807 /* 1^x == 1 */
808 if (is_vec_one(op_const[0]))
809 return op_const[0];
810
811 /* x^1 == x */
812 if (is_vec_one(op_const[1]))
813 return ir->operands[0];
814
815 /* pow(2,x) == exp2(x) */
816 if (is_vec_two(op_const[0]))
817 return expr(ir_unop_exp2, ir->operands[1]);
818
819 if (is_vec_two(op_const[1])) {
820 ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
821 ir_var_temporary);
822 base_ir->insert_before(x);
823 base_ir->insert_before(assign(x, ir->operands[0]));
824 return mul(x, x);
825 }
826
827 if (is_vec_four(op_const[1])) {
828 ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
829 ir_var_temporary);
830 base_ir->insert_before(x);
831 base_ir->insert_before(assign(x, ir->operands[0]));
832
833 ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type,
834 "squared",
835 ir_var_temporary);
836 base_ir->insert_before(squared);
837 base_ir->insert_before(assign(squared, mul(x, x)));
838 return mul(squared, squared);
839 }
840
841 break;
842
843 case ir_binop_min:
844 case ir_binop_max:
845 if (!ir->type->is_float() || options->EmitNoSat)
846 break;
847
848 /* Replace min(max) operations and its commutative combinations with
849 * a saturate operation
850 */
851 for (int op = 0; op < 2; op++) {
852 ir_expression *inner_expr = op_expr[op];
853 ir_constant *outer_const = op_const[1 - op];
854 ir_expression_operation op_cond = (ir->operation == ir_binop_max) ?
855 ir_binop_min : ir_binop_max;
856
857 if (!inner_expr || !outer_const || (inner_expr->operation != op_cond))
858 continue;
859
860 /* One of these has to be a constant */
861 if (!inner_expr->operands[0]->as_constant() &&
862 !inner_expr->operands[1]->as_constant())
863 break;
864
865 /* Found a min(max) combination. Now try to see if its operands
866 * meet our conditions that we can do just a single saturate operation
867 */
868 for (int minmax_op = 0; minmax_op < 2; minmax_op++) {
869 ir_rvalue *x = inner_expr->operands[minmax_op];
870 ir_rvalue *y = inner_expr->operands[1 - minmax_op];
871
872 ir_constant *inner_const = y->as_constant();
873 if (!inner_const)
874 continue;
875
876 /* min(max(x, 0.0), 1.0) is sat(x) */
877 if (ir->operation == ir_binop_min &&
878 inner_const->is_zero() &&
879 outer_const->is_one())
880 return saturate(x);
881
882 /* max(min(x, 1.0), 0.0) is sat(x) */
883 if (ir->operation == ir_binop_max &&
884 inner_const->is_one() &&
885 outer_const->is_zero())
886 return saturate(x);
887
888 /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */
889 if (ir->operation == ir_binop_min &&
890 inner_const->is_zero() &&
891 is_less_than_one(outer_const))
892 return saturate(expr(ir_binop_min, x, outer_const));
893
894 /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */
895 if (ir->operation == ir_binop_max &&
896 is_less_than_one(inner_const) &&
897 outer_const->is_zero())
898 return saturate(expr(ir_binop_min, x, inner_const));
899
900 /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */
901 if (ir->operation == ir_binop_max &&
902 inner_const->is_one() &&
903 is_greater_than_zero(outer_const))
904 return saturate(expr(ir_binop_max, x, outer_const));
905
906 /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */
907 if (ir->operation == ir_binop_min &&
908 is_greater_than_zero(inner_const) &&
909 outer_const->is_one())
910 return saturate(expr(ir_binop_max, x, inner_const));
911 }
912 }
913
914 break;
915
916 case ir_unop_rcp:
917 if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp)
918 return op_expr[0]->operands[0];
919
920 if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 ||
921 op_expr[0]->operation == ir_unop_exp)) {
922 return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type,
923 neg(op_expr[0]->operands[0]));
924 }
925
926 /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at
927 * its IR level, so we can always apply this transformation.
928 */
929 if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq)
930 return sqrt(op_expr[0]->operands[0]);
931
932 /* As far as we know, all backends are OK with rsq. */
933 if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) {
934 return rsq(op_expr[0]->operands[0]);
935 }
936
937 break;
938
939 case ir_triop_fma:
940 /* Operands are op0 * op1 + op2. */
941 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
942 return ir->operands[2];
943 } else if (is_vec_zero(op_const[2])) {
944 return mul(ir->operands[0], ir->operands[1]);
945 } else if (is_vec_one(op_const[0])) {
946 return add(ir->operands[1], ir->operands[2]);
947 } else if (is_vec_one(op_const[1])) {
948 return add(ir->operands[0], ir->operands[2]);
949 }
950 break;
951
952 case ir_triop_lrp:
953 /* Operands are (x, y, a). */
954 if (is_vec_zero(op_const[2])) {
955 return ir->operands[0];
956 } else if (is_vec_one(op_const[2])) {
957 return ir->operands[1];
958 } else if (ir->operands[0]->equals(ir->operands[1])) {
959 return ir->operands[0];
960 } else if (is_vec_zero(op_const[0])) {
961 return mul(ir->operands[1], ir->operands[2]);
962 } else if (is_vec_zero(op_const[1])) {
963 unsigned op2_components = ir->operands[2]->type->vector_elements;
964 ir_constant *one;
965
966 switch (ir->type->base_type) {
967 case GLSL_TYPE_FLOAT:
968 one = new(mem_ctx) ir_constant(1.0f, op2_components);
969 break;
970 case GLSL_TYPE_DOUBLE:
971 one = new(mem_ctx) ir_constant(1.0, op2_components);
972 break;
973 default:
974 one = NULL;
975 unreachable("unexpected type");
976 }
977
978 return mul(ir->operands[0], add(one, neg(ir->operands[2])));
979 }
980 break;
981
982 case ir_triop_csel:
983 if (is_vec_one(op_const[0]))
984 return ir->operands[1];
985 if (is_vec_zero(op_const[0]))
986 return ir->operands[2];
987 break;
988
989 /* Remove interpolateAt* instructions for demoted inputs. They are
990 * assigned a constant expression to facilitate this.
991 */
992 case ir_unop_interpolate_at_centroid:
993 case ir_binop_interpolate_at_offset:
994 case ir_binop_interpolate_at_sample:
995 if (op_const[0])
996 return ir->operands[0];
997 break;
998
999 default:
1000 break;
1001 }
1002
1003 return ir;
1004 }
1005
1006 void
handle_rvalue(ir_rvalue ** rvalue)1007 ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
1008 {
1009 if (!*rvalue)
1010 return;
1011
1012 ir_expression *expr = (*rvalue)->as_expression();
1013 if (!expr || expr->operation == ir_quadop_vector)
1014 return;
1015
1016 ir_rvalue *new_rvalue = handle_expression(expr);
1017 if (new_rvalue == *rvalue)
1018 return;
1019
1020 /* If the expr used to be some vec OP scalar returning a vector, and the
1021 * optimization gave us back a scalar, we still need to turn it into a
1022 * vector.
1023 */
1024 *rvalue = swizzle_if_required(expr, new_rvalue);
1025
1026 this->progress = true;
1027 }
1028
1029 bool
do_algebraic(exec_list * instructions,bool native_integers,const struct gl_shader_compiler_options * options)1030 do_algebraic(exec_list *instructions, bool native_integers,
1031 const struct gl_shader_compiler_options *options)
1032 {
1033 ir_algebraic_visitor v(native_integers, options);
1034
1035 visit_list_elements(&v, instructions);
1036
1037 return v.progress;
1038 }
1039