1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_algebraic.cpp
26 *
27 * Takes advantage of association, commutivity, and other algebraic
28 * properties to simplify expressions.
29 */
30
31 #include "ir.h"
32 #include "ir_visitor.h"
33 #include "ir_rvalue_visitor.h"
34 #include "ir_optimization.h"
35 #include "ir_builder.h"
36 #include "compiler/glsl_types.h"
37
38 using namespace ir_builder;
39
40 namespace {
41
42 /**
43 * Visitor class for replacing expressions with ir_constant values.
44 */
45
46 class ir_algebraic_visitor : public ir_rvalue_visitor {
47 public:
ir_algebraic_visitor(bool native_integers,const struct gl_shader_compiler_options * options)48 ir_algebraic_visitor(bool native_integers,
49 const struct gl_shader_compiler_options *options)
50 : options(options)
51 {
52 this->progress = false;
53 this->mem_ctx = NULL;
54 this->native_integers = native_integers;
55 }
56
~ir_algebraic_visitor()57 virtual ~ir_algebraic_visitor()
58 {
59 }
60
61 virtual ir_visitor_status visit_enter(ir_assignment *ir);
62
63 ir_rvalue *handle_expression(ir_expression *ir);
64 void handle_rvalue(ir_rvalue **rvalue);
65 bool reassociate_constant(ir_expression *ir1,
66 int const_index,
67 ir_constant *constant,
68 ir_expression *ir2);
69 void reassociate_operands(ir_expression *ir1,
70 int op1,
71 ir_expression *ir2,
72 int op2);
73 ir_rvalue *swizzle_if_required(ir_expression *expr,
74 ir_rvalue *operand);
75
76 const struct gl_shader_compiler_options *options;
77 void *mem_ctx;
78
79 bool native_integers;
80 bool progress;
81 };
82
83 } /* unnamed namespace */
84
85 ir_visitor_status
visit_enter(ir_assignment * ir)86 ir_algebraic_visitor::visit_enter(ir_assignment *ir)
87 {
88 ir_variable *var = ir->lhs->variable_referenced();
89 if (var->data.invariant || var->data.precise) {
90 /* If we're assigning to an invariant or precise variable, just bail.
91 * Most of the algebraic optimizations aren't precision-safe.
92 *
93 * FINISHME: Find out which optimizations are precision-safe and enable
94 * then only for invariant or precise trees.
95 */
96 return visit_continue_with_parent;
97 } else {
98 return visit_continue;
99 }
100 }
101
102 static inline bool
is_vec_zero(ir_constant * ir)103 is_vec_zero(ir_constant *ir)
104 {
105 return (ir == NULL) ? false : ir->is_zero();
106 }
107
108 static inline bool
is_vec_one(ir_constant * ir)109 is_vec_one(ir_constant *ir)
110 {
111 return (ir == NULL) ? false : ir->is_one();
112 }
113
114 static inline bool
is_vec_two(ir_constant * ir)115 is_vec_two(ir_constant *ir)
116 {
117 return (ir == NULL) ? false : ir->is_value(2.0, 2);
118 }
119
120 static inline bool
is_vec_four(ir_constant * ir)121 is_vec_four(ir_constant *ir)
122 {
123 return (ir == NULL) ? false : ir->is_value(4.0, 4);
124 }
125
126 static inline bool
is_vec_negative_one(ir_constant * ir)127 is_vec_negative_one(ir_constant *ir)
128 {
129 return (ir == NULL) ? false : ir->is_negative_one();
130 }
131
132 static inline bool
is_valid_vec_const(ir_constant * ir)133 is_valid_vec_const(ir_constant *ir)
134 {
135 if (ir == NULL)
136 return false;
137
138 if (!ir->type->is_scalar() && !ir->type->is_vector())
139 return false;
140
141 return true;
142 }
143
144 static inline bool
is_less_than_one(ir_constant * ir)145 is_less_than_one(ir_constant *ir)
146 {
147 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
148
149 if (!is_valid_vec_const(ir))
150 return false;
151
152 unsigned component = 0;
153 for (int c = 0; c < ir->type->vector_elements; c++) {
154 if (ir->get_float_component(c) < 1.0f)
155 component++;
156 }
157
158 return (component == ir->type->vector_elements);
159 }
160
161 static inline bool
is_greater_than_zero(ir_constant * ir)162 is_greater_than_zero(ir_constant *ir)
163 {
164 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
165
166 if (!is_valid_vec_const(ir))
167 return false;
168
169 unsigned component = 0;
170 for (int c = 0; c < ir->type->vector_elements; c++) {
171 if (ir->get_float_component(c) > 0.0f)
172 component++;
173 }
174
175 return (component == ir->type->vector_elements);
176 }
177
178 static void
update_type(ir_expression * ir)179 update_type(ir_expression *ir)
180 {
181 if (ir->operands[0]->type->is_vector())
182 ir->type = ir->operands[0]->type;
183 else
184 ir->type = ir->operands[1]->type;
185 }
186
187 /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
188 static ir_expression *
try_replace_with_dot(ir_expression * expr0,ir_expression * expr1,void * mem_ctx)189 try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx)
190 {
191 if (expr0 && expr0->operation == ir_binop_add &&
192 expr0->type->is_float() &&
193 expr1 && expr1->operation == ir_binop_add &&
194 expr1->type->is_float()) {
195 ir_swizzle *x = expr0->operands[0]->as_swizzle();
196 ir_swizzle *y = expr0->operands[1]->as_swizzle();
197 ir_swizzle *z = expr1->operands[0]->as_swizzle();
198 ir_swizzle *w = expr1->operands[1]->as_swizzle();
199
200 if (!x || x->mask.num_components != 1 ||
201 !y || y->mask.num_components != 1 ||
202 !z || z->mask.num_components != 1 ||
203 !w || w->mask.num_components != 1) {
204 return NULL;
205 }
206
207 bool swiz_seen[4] = {false, false, false, false};
208 swiz_seen[x->mask.x] = true;
209 swiz_seen[y->mask.x] = true;
210 swiz_seen[z->mask.x] = true;
211 swiz_seen[w->mask.x] = true;
212
213 if (!swiz_seen[0] || !swiz_seen[1] ||
214 !swiz_seen[2] || !swiz_seen[3]) {
215 return NULL;
216 }
217
218 if (x->val->equals(y->val) &&
219 x->val->equals(z->val) &&
220 x->val->equals(w->val)) {
221 return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4));
222 }
223 }
224 return NULL;
225 }
226
227 void
reassociate_operands(ir_expression * ir1,int op1,ir_expression * ir2,int op2)228 ir_algebraic_visitor::reassociate_operands(ir_expression *ir1,
229 int op1,
230 ir_expression *ir2,
231 int op2)
232 {
233 ir_rvalue *temp = ir2->operands[op2];
234 ir2->operands[op2] = ir1->operands[op1];
235 ir1->operands[op1] = temp;
236
237 /* Update the type of ir2. The type of ir1 won't have changed --
238 * base types matched, and at least one of the operands of the 2
239 * binops is still a vector if any of them were.
240 */
241 update_type(ir2);
242
243 this->progress = true;
244 }
245
246 /**
247 * Reassociates a constant down a tree of adds or multiplies.
248 *
249 * Consider (2 * (a * (b * 0.5))). We want to send up with a * b.
250 */
251 bool
reassociate_constant(ir_expression * ir1,int const_index,ir_constant * constant,ir_expression * ir2)252 ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index,
253 ir_constant *constant,
254 ir_expression *ir2)
255 {
256 if (!ir2 || ir1->operation != ir2->operation)
257 return false;
258
259 /* Don't want to even think about matrices. */
260 if (ir1->operands[0]->type->is_matrix() ||
261 ir1->operands[1]->type->is_matrix() ||
262 ir2->operands[0]->type->is_matrix() ||
263 ir2->operands[1]->type->is_matrix())
264 return false;
265
266 ir_constant *ir2_const[2];
267 ir2_const[0] = ir2->operands[0]->constant_expression_value();
268 ir2_const[1] = ir2->operands[1]->constant_expression_value();
269
270 if (ir2_const[0] && ir2_const[1])
271 return false;
272
273 if (ir2_const[0]) {
274 reassociate_operands(ir1, const_index, ir2, 1);
275 return true;
276 } else if (ir2_const[1]) {
277 reassociate_operands(ir1, const_index, ir2, 0);
278 return true;
279 }
280
281 if (reassociate_constant(ir1, const_index, constant,
282 ir2->operands[0]->as_expression())) {
283 update_type(ir2);
284 return true;
285 }
286
287 if (reassociate_constant(ir1, const_index, constant,
288 ir2->operands[1]->as_expression())) {
289 update_type(ir2);
290 return true;
291 }
292
293 return false;
294 }
295
296 /* When eliminating an expression and just returning one of its operands,
297 * we may need to swizzle that operand out to a vector if the expression was
298 * vector type.
299 */
300 ir_rvalue *
swizzle_if_required(ir_expression * expr,ir_rvalue * operand)301 ir_algebraic_visitor::swizzle_if_required(ir_expression *expr,
302 ir_rvalue *operand)
303 {
304 if (expr->type->is_vector() && operand->type->is_scalar()) {
305 return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0,
306 expr->type->vector_elements);
307 } else
308 return operand;
309 }
310
311 ir_rvalue *
handle_expression(ir_expression * ir)312 ir_algebraic_visitor::handle_expression(ir_expression *ir)
313 {
314 ir_constant *op_const[4] = {NULL, NULL, NULL, NULL};
315 ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL};
316 unsigned int i;
317
318 if (ir->operation == ir_binop_mul &&
319 ir->operands[0]->type->is_matrix() &&
320 ir->operands[1]->type->is_vector()) {
321 ir_expression *matrix_mul = ir->operands[0]->as_expression();
322
323 if (matrix_mul && matrix_mul->operation == ir_binop_mul &&
324 matrix_mul->operands[0]->type->is_matrix() &&
325 matrix_mul->operands[1]->type->is_matrix()) {
326
327 return mul(matrix_mul->operands[0],
328 mul(matrix_mul->operands[1], ir->operands[1]));
329 }
330 }
331
332 assert(ir->get_num_operands() <= 4);
333 for (i = 0; i < ir->get_num_operands(); i++) {
334 if (ir->operands[i]->type->is_matrix())
335 return ir;
336
337 op_const[i] = ir->operands[i]->constant_expression_value();
338 op_expr[i] = ir->operands[i]->as_expression();
339 }
340
341 if (this->mem_ctx == NULL)
342 this->mem_ctx = ralloc_parent(ir);
343
344 switch (ir->operation) {
345 case ir_unop_bit_not:
346 if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not)
347 return op_expr[0]->operands[0];
348 break;
349
350 case ir_unop_abs:
351 if (op_expr[0] == NULL)
352 break;
353
354 switch (op_expr[0]->operation) {
355 case ir_unop_abs:
356 case ir_unop_neg:
357 return abs(op_expr[0]->operands[0]);
358 default:
359 break;
360 }
361 break;
362
363 case ir_unop_neg:
364 if (op_expr[0] == NULL)
365 break;
366
367 if (op_expr[0]->operation == ir_unop_neg) {
368 return op_expr[0]->operands[0];
369 }
370 break;
371
372 case ir_unop_exp:
373 if (op_expr[0] == NULL)
374 break;
375
376 if (op_expr[0]->operation == ir_unop_log) {
377 return op_expr[0]->operands[0];
378 }
379 break;
380
381 case ir_unop_log:
382 if (op_expr[0] == NULL)
383 break;
384
385 if (op_expr[0]->operation == ir_unop_exp) {
386 return op_expr[0]->operands[0];
387 }
388 break;
389
390 case ir_unop_exp2:
391 if (op_expr[0] == NULL)
392 break;
393
394 if (op_expr[0]->operation == ir_unop_log2) {
395 return op_expr[0]->operands[0];
396 }
397
398 if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) {
399 for (int log2_pos = 0; log2_pos < 2; log2_pos++) {
400 ir_expression *log2_expr =
401 op_expr[0]->operands[log2_pos]->as_expression();
402
403 if (log2_expr && log2_expr->operation == ir_unop_log2) {
404 return new(mem_ctx) ir_expression(ir_binop_pow,
405 ir->type,
406 log2_expr->operands[0],
407 op_expr[0]->operands[1 - log2_pos]);
408 }
409 }
410 }
411 break;
412
413 case ir_unop_log2:
414 if (op_expr[0] == NULL)
415 break;
416
417 if (op_expr[0]->operation == ir_unop_exp2) {
418 return op_expr[0]->operands[0];
419 }
420 break;
421
422 case ir_unop_f2i:
423 case ir_unop_f2u:
424 if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) {
425 return new(mem_ctx) ir_expression(ir->operation,
426 ir->type,
427 op_expr[0]->operands[0]);
428 }
429 break;
430
431 case ir_unop_logic_not: {
432 enum ir_expression_operation new_op = ir_unop_logic_not;
433
434 if (op_expr[0] == NULL)
435 break;
436
437 switch (op_expr[0]->operation) {
438 case ir_binop_less: new_op = ir_binop_gequal; break;
439 case ir_binop_greater: new_op = ir_binop_lequal; break;
440 case ir_binop_lequal: new_op = ir_binop_greater; break;
441 case ir_binop_gequal: new_op = ir_binop_less; break;
442 case ir_binop_equal: new_op = ir_binop_nequal; break;
443 case ir_binop_nequal: new_op = ir_binop_equal; break;
444 case ir_binop_all_equal: new_op = ir_binop_any_nequal; break;
445 case ir_binop_any_nequal: new_op = ir_binop_all_equal; break;
446
447 default:
448 /* The default case handler is here to silence a warning from GCC.
449 */
450 break;
451 }
452
453 if (new_op != ir_unop_logic_not) {
454 return new(mem_ctx) ir_expression(new_op,
455 ir->type,
456 op_expr[0]->operands[0],
457 op_expr[0]->operands[1]);
458 }
459
460 break;
461 }
462
463 case ir_unop_saturate:
464 if (op_expr[0] && op_expr[0]->operation == ir_binop_add) {
465 ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression();
466 ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression();
467
468 if (b2f_0 && b2f_0->operation == ir_unop_b2f &&
469 b2f_1 && b2f_1->operation == ir_unop_b2f) {
470 return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0]));
471 }
472 }
473 break;
474
475 case ir_binop_add:
476 if (is_vec_zero(op_const[0]))
477 return ir->operands[1];
478 if (is_vec_zero(op_const[1]))
479 return ir->operands[0];
480
481 /* Reassociate addition of constants so that we can do constant
482 * folding.
483 */
484 if (op_const[0] && !op_const[1])
485 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
486 if (op_const[1] && !op_const[0])
487 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
488
489 /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
490 if (options->OptimizeForAOS) {
491 ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1],
492 mem_ctx);
493 if (expr)
494 return expr;
495 }
496
497 /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a).
498 *
499 * (-x + y) * a + x
500 * (x * -a) + (y * a) + x
501 * x + (x * -a) + (y * a)
502 * x * (1 - a) + y * a
503 * lrp(x, y, a)
504 */
505 for (int mul_pos = 0; mul_pos < 2; mul_pos++) {
506 ir_expression *mul = op_expr[mul_pos];
507
508 if (!mul || mul->operation != ir_binop_mul)
509 continue;
510
511 /* Multiply found on one of the operands. Now check for an
512 * inner addition operation.
513 */
514 for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) {
515 ir_expression *inner_add =
516 mul->operands[inner_add_pos]->as_expression();
517
518 if (!inner_add || inner_add->operation != ir_binop_add)
519 continue;
520
521 /* Inner addition found on one of the operands. Now check for
522 * one of the operands of the inner addition to be the negative
523 * of x_operand.
524 */
525 for (int neg_pos = 0; neg_pos < 2; neg_pos++) {
526 ir_expression *neg =
527 inner_add->operands[neg_pos]->as_expression();
528
529 if (!neg || neg->operation != ir_unop_neg)
530 continue;
531
532 ir_rvalue *x_operand = ir->operands[1 - mul_pos];
533
534 if (!neg->operands[0]->equals(x_operand))
535 continue;
536
537 ir_rvalue *y_operand = inner_add->operands[1 - neg_pos];
538 ir_rvalue *a_operand = mul->operands[1 - inner_add_pos];
539
540 if (x_operand->type != y_operand->type ||
541 x_operand->type != a_operand->type)
542 continue;
543
544 return lrp(x_operand, y_operand, a_operand);
545 }
546 }
547 }
548
549 break;
550
551 case ir_binop_sub:
552 if (is_vec_zero(op_const[0]))
553 return neg(ir->operands[1]);
554 if (is_vec_zero(op_const[1]))
555 return ir->operands[0];
556 break;
557
558 case ir_binop_mul:
559 if (is_vec_one(op_const[0]))
560 return ir->operands[1];
561 if (is_vec_one(op_const[1]))
562 return ir->operands[0];
563
564 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
565 return ir_constant::zero(ir, ir->type);
566
567 if (is_vec_negative_one(op_const[0]))
568 return neg(ir->operands[1]);
569 if (is_vec_negative_one(op_const[1]))
570 return neg(ir->operands[0]);
571
572 if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f &&
573 op_expr[1] && op_expr[1]->operation == ir_unop_b2f) {
574 return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0]));
575 }
576
577 /* Reassociate multiplication of constants so that we can do
578 * constant folding.
579 */
580 if (op_const[0] && !op_const[1])
581 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
582 if (op_const[1] && !op_const[0])
583 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
584
585 /* Optimizes
586 *
587 * (mul (floor (add (abs x) 0.5) (sign x)))
588 *
589 * into
590 *
591 * (trunc (add x (mul (sign x) 0.5)))
592 */
593 for (int i = 0; i < 2; i++) {
594 ir_expression *sign_expr = ir->operands[i]->as_expression();
595 ir_expression *floor_expr = ir->operands[1 - i]->as_expression();
596
597 if (!sign_expr || sign_expr->operation != ir_unop_sign ||
598 !floor_expr || floor_expr->operation != ir_unop_floor)
599 continue;
600
601 ir_expression *add_expr = floor_expr->operands[0]->as_expression();
602 if (!add_expr || add_expr->operation != ir_binop_add)
603 continue;
604
605 for (int j = 0; j < 2; j++) {
606 ir_expression *abs_expr = add_expr->operands[j]->as_expression();
607 if (!abs_expr || abs_expr->operation != ir_unop_abs)
608 continue;
609
610 ir_constant *point_five = add_expr->operands[1 - j]->as_constant();
611 if (!point_five || !point_five->is_value(0.5, 0))
612 continue;
613
614 if (abs_expr->operands[0]->equals(sign_expr->operands[0])) {
615 return trunc(add(abs_expr->operands[0],
616 mul(sign_expr, point_five)));
617 }
618 }
619 }
620 break;
621
622 case ir_binop_div:
623 if (is_vec_one(op_const[0]) && (
624 ir->type->base_type == GLSL_TYPE_FLOAT ||
625 ir->type->base_type == GLSL_TYPE_DOUBLE)) {
626 return new(mem_ctx) ir_expression(ir_unop_rcp,
627 ir->operands[1]->type,
628 ir->operands[1],
629 NULL);
630 }
631 if (is_vec_one(op_const[1]))
632 return ir->operands[0];
633 break;
634
635 case ir_binop_dot:
636 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
637 return ir_constant::zero(mem_ctx, ir->type);
638
639 for (int i = 0; i < 2; i++) {
640 if (!op_const[i])
641 continue;
642
643 unsigned components[4] = { 0 }, count = 0;
644
645 for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) {
646 if (op_const[i]->is_zero())
647 continue;
648
649 components[count] = c;
650 count++;
651 }
652
653 /* No channels had zero values; bail. */
654 if (count >= op_const[i]->type->vector_elements)
655 break;
656
657 ir_expression_operation op = count == 1 ?
658 ir_binop_mul : ir_binop_dot;
659
660 /* Swizzle both operands to remove the channels that were zero. */
661 return new(mem_ctx)
662 ir_expression(op, ir->type,
663 new(mem_ctx) ir_swizzle(ir->operands[0],
664 components, count),
665 new(mem_ctx) ir_swizzle(ir->operands[1],
666 components, count));
667 }
668 break;
669
670 case ir_binop_less:
671 case ir_binop_lequal:
672 case ir_binop_greater:
673 case ir_binop_gequal:
674 case ir_binop_equal:
675 case ir_binop_nequal:
676 for (int add_pos = 0; add_pos < 2; add_pos++) {
677 ir_expression *add = op_expr[add_pos];
678
679 if (!add || add->operation != ir_binop_add)
680 continue;
681
682 ir_constant *zero = op_const[1 - add_pos];
683 if (!is_vec_zero(zero))
684 continue;
685
686 /* Depending of the zero position we want to optimize
687 * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y)
688 */
689 if (add_pos == 1) {
690 return new(mem_ctx) ir_expression(ir->operation,
691 neg(add->operands[0]),
692 add->operands[1]);
693 } else {
694 return new(mem_ctx) ir_expression(ir->operation,
695 add->operands[0],
696 neg(add->operands[1]));
697 }
698 }
699 break;
700
701 case ir_binop_all_equal:
702 case ir_binop_any_nequal:
703 if (ir->operands[0]->type->is_scalar() &&
704 ir->operands[1]->type->is_scalar())
705 return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal
706 ? ir_binop_equal : ir_binop_nequal,
707 ir->operands[0],
708 ir->operands[1]);
709 break;
710
711 case ir_binop_rshift:
712 case ir_binop_lshift:
713 /* 0 >> x == 0 */
714 if (is_vec_zero(op_const[0]))
715 return ir->operands[0];
716 /* x >> 0 == x */
717 if (is_vec_zero(op_const[1]))
718 return ir->operands[0];
719 break;
720
721 case ir_binop_logic_and:
722 if (is_vec_one(op_const[0])) {
723 return ir->operands[1];
724 } else if (is_vec_one(op_const[1])) {
725 return ir->operands[0];
726 } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
727 return ir_constant::zero(mem_ctx, ir->type);
728 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
729 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
730 /* De Morgan's Law:
731 * (not A) and (not B) === not (A or B)
732 */
733 return logic_not(logic_or(op_expr[0]->operands[0],
734 op_expr[1]->operands[0]));
735 } else if (ir->operands[0]->equals(ir->operands[1])) {
736 /* (a && a) == a */
737 return ir->operands[0];
738 }
739 break;
740
741 case ir_binop_logic_xor:
742 if (is_vec_zero(op_const[0])) {
743 return ir->operands[1];
744 } else if (is_vec_zero(op_const[1])) {
745 return ir->operands[0];
746 } else if (is_vec_one(op_const[0])) {
747 return logic_not(ir->operands[1]);
748 } else if (is_vec_one(op_const[1])) {
749 return logic_not(ir->operands[0]);
750 } else if (ir->operands[0]->equals(ir->operands[1])) {
751 /* (a ^^ a) == false */
752 return ir_constant::zero(mem_ctx, ir->type);
753 }
754 break;
755
756 case ir_binop_logic_or:
757 if (is_vec_zero(op_const[0])) {
758 return ir->operands[1];
759 } else if (is_vec_zero(op_const[1])) {
760 return ir->operands[0];
761 } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) {
762 ir_constant_data data;
763
764 for (unsigned i = 0; i < 16; i++)
765 data.b[i] = true;
766
767 return new(mem_ctx) ir_constant(ir->type, &data);
768 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
769 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
770 /* De Morgan's Law:
771 * (not A) or (not B) === not (A and B)
772 */
773 return logic_not(logic_and(op_expr[0]->operands[0],
774 op_expr[1]->operands[0]));
775 } else if (ir->operands[0]->equals(ir->operands[1])) {
776 /* (a || a) == a */
777 return ir->operands[0];
778 }
779 break;
780
781 case ir_binop_pow:
782 /* 1^x == 1 */
783 if (is_vec_one(op_const[0]))
784 return op_const[0];
785
786 /* x^1 == x */
787 if (is_vec_one(op_const[1]))
788 return ir->operands[0];
789
790 /* pow(2,x) == exp2(x) */
791 if (is_vec_two(op_const[0]))
792 return expr(ir_unop_exp2, ir->operands[1]);
793
794 if (is_vec_two(op_const[1])) {
795 ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
796 ir_var_temporary);
797 base_ir->insert_before(x);
798 base_ir->insert_before(assign(x, ir->operands[0]));
799 return mul(x, x);
800 }
801
802 if (is_vec_four(op_const[1])) {
803 ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
804 ir_var_temporary);
805 base_ir->insert_before(x);
806 base_ir->insert_before(assign(x, ir->operands[0]));
807
808 ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type,
809 "squared",
810 ir_var_temporary);
811 base_ir->insert_before(squared);
812 base_ir->insert_before(assign(squared, mul(x, x)));
813 return mul(squared, squared);
814 }
815
816 break;
817
818 case ir_binop_min:
819 case ir_binop_max:
820 if (ir->type->base_type != GLSL_TYPE_FLOAT || options->EmitNoSat)
821 break;
822
823 /* Replace min(max) operations and its commutative combinations with
824 * a saturate operation
825 */
826 for (int op = 0; op < 2; op++) {
827 ir_expression *inner_expr = op_expr[op];
828 ir_constant *outer_const = op_const[1 - op];
829 ir_expression_operation op_cond = (ir->operation == ir_binop_max) ?
830 ir_binop_min : ir_binop_max;
831
832 if (!inner_expr || !outer_const || (inner_expr->operation != op_cond))
833 continue;
834
835 /* One of these has to be a constant */
836 if (!inner_expr->operands[0]->as_constant() &&
837 !inner_expr->operands[1]->as_constant())
838 break;
839
840 /* Found a min(max) combination. Now try to see if its operands
841 * meet our conditions that we can do just a single saturate operation
842 */
843 for (int minmax_op = 0; minmax_op < 2; minmax_op++) {
844 ir_rvalue *x = inner_expr->operands[minmax_op];
845 ir_rvalue *y = inner_expr->operands[1 - minmax_op];
846
847 ir_constant *inner_const = y->as_constant();
848 if (!inner_const)
849 continue;
850
851 /* min(max(x, 0.0), 1.0) is sat(x) */
852 if (ir->operation == ir_binop_min &&
853 inner_const->is_zero() &&
854 outer_const->is_one())
855 return saturate(x);
856
857 /* max(min(x, 1.0), 0.0) is sat(x) */
858 if (ir->operation == ir_binop_max &&
859 inner_const->is_one() &&
860 outer_const->is_zero())
861 return saturate(x);
862
863 /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */
864 if (ir->operation == ir_binop_min &&
865 inner_const->is_zero() &&
866 is_less_than_one(outer_const))
867 return saturate(expr(ir_binop_min, x, outer_const));
868
869 /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */
870 if (ir->operation == ir_binop_max &&
871 is_less_than_one(inner_const) &&
872 outer_const->is_zero())
873 return saturate(expr(ir_binop_min, x, inner_const));
874
875 /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */
876 if (ir->operation == ir_binop_max &&
877 inner_const->is_one() &&
878 is_greater_than_zero(outer_const))
879 return saturate(expr(ir_binop_max, x, outer_const));
880
881 /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */
882 if (ir->operation == ir_binop_min &&
883 is_greater_than_zero(inner_const) &&
884 outer_const->is_one())
885 return saturate(expr(ir_binop_max, x, inner_const));
886 }
887 }
888
889 break;
890
891 case ir_unop_rcp:
892 if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp)
893 return op_expr[0]->operands[0];
894
895 if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 ||
896 op_expr[0]->operation == ir_unop_exp)) {
897 return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type,
898 neg(op_expr[0]->operands[0]));
899 }
900
901 /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at
902 * its IR level, so we can always apply this transformation.
903 */
904 if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq)
905 return sqrt(op_expr[0]->operands[0]);
906
907 /* As far as we know, all backends are OK with rsq. */
908 if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) {
909 return rsq(op_expr[0]->operands[0]);
910 }
911
912 break;
913
914 case ir_triop_fma:
915 /* Operands are op0 * op1 + op2. */
916 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
917 return ir->operands[2];
918 } else if (is_vec_zero(op_const[2])) {
919 return mul(ir->operands[0], ir->operands[1]);
920 } else if (is_vec_one(op_const[0])) {
921 return add(ir->operands[1], ir->operands[2]);
922 } else if (is_vec_one(op_const[1])) {
923 return add(ir->operands[0], ir->operands[2]);
924 }
925 break;
926
927 case ir_triop_lrp:
928 /* Operands are (x, y, a). */
929 if (is_vec_zero(op_const[2])) {
930 return ir->operands[0];
931 } else if (is_vec_one(op_const[2])) {
932 return ir->operands[1];
933 } else if (ir->operands[0]->equals(ir->operands[1])) {
934 return ir->operands[0];
935 } else if (is_vec_zero(op_const[0])) {
936 return mul(ir->operands[1], ir->operands[2]);
937 } else if (is_vec_zero(op_const[1])) {
938 unsigned op2_components = ir->operands[2]->type->vector_elements;
939 ir_constant *one;
940
941 switch (ir->type->base_type) {
942 case GLSL_TYPE_FLOAT:
943 one = new(mem_ctx) ir_constant(1.0f, op2_components);
944 break;
945 case GLSL_TYPE_DOUBLE:
946 one = new(mem_ctx) ir_constant(1.0, op2_components);
947 break;
948 default:
949 one = NULL;
950 unreachable("unexpected type");
951 }
952
953 return mul(ir->operands[0], add(one, neg(ir->operands[2])));
954 }
955 break;
956
957 case ir_triop_csel:
958 if (is_vec_one(op_const[0]))
959 return ir->operands[1];
960 if (is_vec_zero(op_const[0]))
961 return ir->operands[2];
962 break;
963
964 /* Remove interpolateAt* instructions for demoted inputs. They are
965 * assigned a constant expression to facilitate this.
966 */
967 case ir_unop_interpolate_at_centroid:
968 case ir_binop_interpolate_at_offset:
969 case ir_binop_interpolate_at_sample:
970 if (op_const[0])
971 return ir->operands[0];
972 break;
973
974 default:
975 break;
976 }
977
978 return ir;
979 }
980
981 void
handle_rvalue(ir_rvalue ** rvalue)982 ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
983 {
984 if (!*rvalue)
985 return;
986
987 ir_expression *expr = (*rvalue)->as_expression();
988 if (!expr || expr->operation == ir_quadop_vector)
989 return;
990
991 ir_rvalue *new_rvalue = handle_expression(expr);
992 if (new_rvalue == *rvalue)
993 return;
994
995 /* If the expr used to be some vec OP scalar returning a vector, and the
996 * optimization gave us back a scalar, we still need to turn it into a
997 * vector.
998 */
999 *rvalue = swizzle_if_required(expr, new_rvalue);
1000
1001 this->progress = true;
1002 }
1003
1004 bool
do_algebraic(exec_list * instructions,bool native_integers,const struct gl_shader_compiler_options * options)1005 do_algebraic(exec_list *instructions, bool native_integers,
1006 const struct gl_shader_compiler_options *options)
1007 {
1008 ir_algebraic_visitor v(native_integers, options);
1009
1010 visit_list_elements(&v, instructions);
1011
1012 return v.progress;
1013 }
1014