• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file ir_div_to_mul_rcp.cpp
26  *
27  * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
28  *
29  * Many GPUs don't have a divide instruction (945 and 965 included),
30  * but they do have an RCP instruction to compute an approximate
31  * reciprocal.  By breaking the operation down, constant reciprocals
32  * can get constant folded.
33  */
34 
35 #include "ir.h"
36 #include "glsl_types.h"
37 
38 class ir_div_to_mul_rcp_visitor : public ir_hierarchical_visitor {
39 public:
ir_div_to_mul_rcp_visitor()40    ir_div_to_mul_rcp_visitor()
41    {
42       this->made_progress = false;
43    }
44 
45    ir_visitor_status visit_leave(ir_expression *);
46 
47    bool made_progress;
48 };
49 
50 bool
do_div_to_mul_rcp(exec_list * instructions)51 do_div_to_mul_rcp(exec_list *instructions)
52 {
53    ir_div_to_mul_rcp_visitor v;
54 
55    visit_list_elements(&v, instructions);
56    return v.made_progress;
57 }
58 
59 ir_visitor_status
visit_leave(ir_expression * ir)60 ir_div_to_mul_rcp_visitor::visit_leave(ir_expression *ir)
61 {
62    if (ir->operation != ir_binop_div)
63       return visit_continue;
64 
65    if (ir->operands[1]->type->base_type != GLSL_TYPE_INT &&
66        ir->operands[1]->type->base_type != GLSL_TYPE_UINT) {
67       /* New expression for the 1.0 / op1 */
68       ir_rvalue *expr;
69       expr = new(ir) ir_expression(ir_unop_rcp,
70 				   ir->operands[1]->type,
71 				   ir->operands[1],
72 				   NULL);
73 
74       /* op0 / op1 -> op0 * (1.0 / op1) */
75       ir->operation = ir_binop_mul;
76       ir->operands[1] = expr;
77    } else {
78       /* Be careful with integer division -- we need to do it as a
79        * float and re-truncate, since rcp(n > 1) of an integer would
80        * just be 0.
81        */
82       ir_rvalue *op0, *op1;
83       const struct glsl_type *vec_type;
84 
85       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
86 					 ir->operands[1]->type->vector_elements,
87 					 ir->operands[1]->type->matrix_columns);
88 
89       if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
90 	 op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
91       else
92 	 op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
93 
94       op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
95 
96       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
97 					 ir->operands[0]->type->vector_elements,
98 					 ir->operands[0]->type->matrix_columns);
99 
100       if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
101 	 op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
102       else
103 	 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
104 
105       op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
106 
107       ir->operation = ir_unop_f2i;
108       ir->operands[0] = op0;
109       ir->operands[1] = NULL;
110    }
111 
112    this->made_progress = true;
113 
114    return visit_continue;
115 }
116