• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file opt_function_inlining.cpp
26  *
27  * Replaces calls to functions with the body of the function.
28  */
29 
30 #include "ir.h"
31 #include "ir_visitor.h"
32 #include "ir_function_inlining.h"
33 #include "ir_expression_flattening.h"
34 #include "compiler/glsl_types.h"
35 #include "util/hash_table.h"
36 
37 static void
38 do_variable_replacement(exec_list *instructions,
39                         ir_variable *orig,
40                         ir_dereference *repl);
41 
42 namespace {
43 
44 class ir_function_inlining_visitor : public ir_hierarchical_visitor {
45 public:
ir_function_inlining_visitor()46    ir_function_inlining_visitor()
47    {
48       progress = false;
49    }
50 
~ir_function_inlining_visitor()51    virtual ~ir_function_inlining_visitor()
52    {
53       /* empty */
54    }
55 
56    virtual ir_visitor_status visit_enter(ir_expression *);
57    virtual ir_visitor_status visit_enter(ir_call *);
58    virtual ir_visitor_status visit_enter(ir_return *);
59    virtual ir_visitor_status visit_enter(ir_texture *);
60    virtual ir_visitor_status visit_enter(ir_swizzle *);
61 
62    bool progress;
63 };
64 
65 class ir_save_lvalue_visitor : public ir_hierarchical_visitor {
66 public:
67    virtual ir_visitor_status visit_enter(ir_dereference_array *);
68 };
69 
70 } /* unnamed namespace */
71 
72 bool
do_function_inlining(exec_list * instructions)73 do_function_inlining(exec_list *instructions)
74 {
75    ir_function_inlining_visitor v;
76 
77    v.run(instructions);
78 
79    return v.progress;
80 }
81 
82 static void
replace_return_with_assignment(ir_instruction * ir,void * data)83 replace_return_with_assignment(ir_instruction *ir, void *data)
84 {
85    void *ctx = ralloc_parent(ir);
86    ir_dereference *orig_deref = (ir_dereference *) data;
87    ir_return *ret = ir->as_return();
88 
89    if (ret) {
90       if (ret->value) {
91 	 ir_rvalue *lhs = orig_deref->clone(ctx, NULL);
92          ret->replace_with(new(ctx) ir_assignment(lhs, ret->value));
93       } else {
94 	 /* un-valued return has to be the last return, or we shouldn't
95 	  * have reached here. (see can_inline()).
96 	  */
97 	 assert(ret->next->is_tail_sentinel());
98 	 ret->remove();
99       }
100    }
101 }
102 
103 /* Save the given lvalue before the given instruction.
104  *
105  * This is done by adding temporary variables into which the current value
106  * of any array indices are saved, and then modifying the dereference chain
107  * in-place to point to those temporary variables.
108  *
109  * The hierarchical visitor is only used to traverse the left-hand-side chain
110  * of derefs.
111  */
112 ir_visitor_status
visit_enter(ir_dereference_array * deref)113 ir_save_lvalue_visitor::visit_enter(ir_dereference_array *deref)
114 {
115    if (deref->array_index->ir_type != ir_type_constant) {
116       void *ctx = ralloc_parent(deref);
117       ir_variable *index;
118       ir_assignment *assignment;
119 
120       index = new(ctx) ir_variable(deref->array_index->type, "saved_idx", ir_var_temporary);
121       base_ir->insert_before(index);
122 
123       assignment = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(index),
124                                           deref->array_index);
125       base_ir->insert_before(assignment);
126 
127       deref->array_index = new(ctx) ir_dereference_variable(index);
128    }
129 
130    deref->array->accept(this);
131    return visit_stop;
132 }
133 
134 static bool
should_replace_variable(ir_variable * sig_param,ir_rvalue * param)135 should_replace_variable(ir_variable *sig_param, ir_rvalue *param) {
136    /* For opaque types, we want the inlined variable references
137     * referencing the passed in variable, since that will have
138     * the location information, which an assignment of an opaque
139     * variable wouldn't.
140     */
141    return sig_param->type->contains_opaque() &&
142           param->is_dereference() &&
143           sig_param->data.mode == ir_var_function_in;
144 }
145 
146 void
generate_inline(ir_instruction * next_ir)147 ir_call::generate_inline(ir_instruction *next_ir)
148 {
149    void *ctx = ralloc_parent(this);
150    ir_variable **parameters;
151    unsigned num_parameters;
152    int i;
153    struct hash_table *ht;
154 
155    ht = _mesa_pointer_hash_table_create(NULL);
156 
157    num_parameters = this->callee->parameters.length();
158    parameters = new ir_variable *[num_parameters];
159 
160    /* Generate the declarations for the parameters to our inlined code,
161     * and set up the mapping of real function body variables to ours.
162     */
163    i = 0;
164    foreach_two_lists(formal_node, &this->callee->parameters,
165                      actual_node, &this->actual_parameters) {
166       ir_variable *sig_param = (ir_variable *) formal_node;
167       ir_rvalue *param = (ir_rvalue *) actual_node;
168 
169       /* Generate a new variable for the parameter. */
170       if (should_replace_variable(sig_param, param)) {
171          /* Actual replacement happens below */
172 	 parameters[i] = NULL;
173       } else {
174 	 parameters[i] = sig_param->clone(ctx, ht);
175 	 parameters[i]->data.mode = ir_var_temporary;
176 
177 	 /* Remove the read-only decoration because we're going to write
178 	  * directly to this variable.  If the cloned variable is left
179 	  * read-only and the inlined function is inside a loop, the loop
180 	  * analysis code will get confused.
181 	  */
182 	 parameters[i]->data.read_only = false;
183 	 next_ir->insert_before(parameters[i]);
184       }
185 
186       /* Section 6.1.1 (Function Calling Conventions) of the OpenGL Shading
187        * Language 4.5 spec says:
188        *
189        *    "All arguments are evaluated at call time, exactly once, in order,
190        *     from left to right. [...] Evaluation of an out parameter results
191        *     in an l-value that is used to copy out a value when the function
192        *     returns."
193        *
194        * I.e., we have to take temporary copies of any relevant array indices
195        * before the function body is executed.
196        *
197        * This ensures that
198        * (a) if an array index expressions refers to a variable that is
199        *     modified by the execution of the function body, we use the
200        *     original value as intended, and
201        * (b) if an array index expression has side effects, those side effects
202        *     are only executed once and at the right time.
203        */
204       if (parameters[i]) {
205          if (sig_param->data.mode == ir_var_function_in ||
206              sig_param->data.mode == ir_var_const_in) {
207             ir_assignment *assign;
208 
209             assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]),
210                                             param);
211             next_ir->insert_before(assign);
212          } else {
213             assert(sig_param->data.mode == ir_var_function_out ||
214                    sig_param->data.mode == ir_var_function_inout);
215             assert(param->is_lvalue());
216 
217             ir_save_lvalue_visitor v;
218             v.base_ir = next_ir;
219 
220             param->accept(&v);
221 
222             if (sig_param->data.mode == ir_var_function_inout) {
223                ir_assignment *assign;
224 
225                assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]),
226                                                param->clone(ctx, NULL)->as_rvalue());
227                next_ir->insert_before(assign);
228             }
229          }
230       }
231 
232       ++i;
233    }
234 
235    exec_list new_instructions;
236 
237    /* Generate the inlined body of the function to a new list */
238    foreach_in_list(ir_instruction, ir, &callee->body) {
239       ir_instruction *new_ir = ir->clone(ctx, ht);
240 
241       new_instructions.push_tail(new_ir);
242       visit_tree(new_ir, replace_return_with_assignment, this->return_deref);
243    }
244 
245    /* If any opaque types were passed in, replace any deref of the
246     * opaque variable with a deref of the argument.
247     */
248    foreach_two_lists(formal_node, &this->callee->parameters,
249                      actual_node, &this->actual_parameters) {
250       ir_rvalue *const param = (ir_rvalue *) actual_node;
251       ir_variable *sig_param = (ir_variable *) formal_node;
252 
253       if (should_replace_variable(sig_param, param)) {
254 	 ir_dereference *deref = param->as_dereference();
255 
256 	 do_variable_replacement(&new_instructions, sig_param, deref);
257       }
258    }
259 
260    /* Now push those new instructions in. */
261    next_ir->insert_before(&new_instructions);
262 
263    /* Copy back the value of any 'out' parameters from the function body
264     * variables to our own.
265     */
266    i = 0;
267    foreach_two_lists(formal_node, &this->callee->parameters,
268                      actual_node, &this->actual_parameters) {
269       ir_rvalue *const param = (ir_rvalue *) actual_node;
270       const ir_variable *const sig_param = (ir_variable *) formal_node;
271 
272       /* Move our param variable into the actual param if it's an 'out' type. */
273       if (parameters[i] && (sig_param->data.mode == ir_var_function_out ||
274 			    sig_param->data.mode == ir_var_function_inout)) {
275 	 ir_assignment *assign;
276 
277          assign = new(ctx) ir_assignment(param,
278                                          new(ctx) ir_dereference_variable(parameters[i]));
279 	 next_ir->insert_before(assign);
280       }
281 
282       ++i;
283    }
284 
285    delete [] parameters;
286 
287    _mesa_hash_table_destroy(ht, NULL);
288 }
289 
290 
291 ir_visitor_status
visit_enter(ir_expression * ir)292 ir_function_inlining_visitor::visit_enter(ir_expression *ir)
293 {
294    (void) ir;
295    return visit_continue_with_parent;
296 }
297 
298 
299 ir_visitor_status
visit_enter(ir_return * ir)300 ir_function_inlining_visitor::visit_enter(ir_return *ir)
301 {
302    (void) ir;
303    return visit_continue_with_parent;
304 }
305 
306 
307 ir_visitor_status
visit_enter(ir_texture * ir)308 ir_function_inlining_visitor::visit_enter(ir_texture *ir)
309 {
310    (void) ir;
311    return visit_continue_with_parent;
312 }
313 
314 
315 ir_visitor_status
visit_enter(ir_swizzle * ir)316 ir_function_inlining_visitor::visit_enter(ir_swizzle *ir)
317 {
318    (void) ir;
319    return visit_continue_with_parent;
320 }
321 
322 
323 ir_visitor_status
visit_enter(ir_call * ir)324 ir_function_inlining_visitor::visit_enter(ir_call *ir)
325 {
326    if (can_inline(ir)) {
327       ir->generate_inline(ir);
328       ir->remove();
329       this->progress = true;
330    }
331 
332    return visit_continue;
333 }
334 
335 
336 /**
337  * Replaces references to the "orig" variable with a clone of "repl."
338  *
339  * From the spec, opaque types can appear in the tree as function
340  * (non-out) parameters and as the result of array indexing and
341  * structure field selection.  In our builtin implementation, they
342  * also appear in the sampler field of an ir_tex instruction.
343  */
344 
345 class ir_variable_replacement_visitor : public ir_hierarchical_visitor {
346 public:
ir_variable_replacement_visitor(ir_variable * orig,ir_dereference * repl)347    ir_variable_replacement_visitor(ir_variable *orig, ir_dereference *repl)
348    {
349       this->orig = orig;
350       this->repl = repl;
351    }
352 
~ir_variable_replacement_visitor()353    virtual ~ir_variable_replacement_visitor()
354    {
355    }
356 
357    virtual ir_visitor_status visit_leave(ir_call *);
358    virtual ir_visitor_status visit_leave(ir_dereference_array *);
359    virtual ir_visitor_status visit_leave(ir_dereference_record *);
360    virtual ir_visitor_status visit_leave(ir_texture *);
361    virtual ir_visitor_status visit_leave(ir_assignment *);
362    virtual ir_visitor_status visit_leave(ir_expression *);
363    virtual ir_visitor_status visit_leave(ir_return *);
364 
365    void replace_deref(ir_dereference **deref);
366    void replace_rvalue(ir_rvalue **rvalue);
367 
368    ir_variable *orig;
369    ir_dereference *repl;
370 };
371 
372 void
replace_deref(ir_dereference ** deref)373 ir_variable_replacement_visitor::replace_deref(ir_dereference **deref)
374 {
375    ir_dereference_variable *deref_var = (*deref)->as_dereference_variable();
376    if (deref_var && deref_var->var == this->orig) {
377       *deref = this->repl->clone(ralloc_parent(*deref), NULL);
378    }
379 }
380 
381 void
replace_rvalue(ir_rvalue ** rvalue)382 ir_variable_replacement_visitor::replace_rvalue(ir_rvalue **rvalue)
383 {
384    if (!*rvalue)
385       return;
386 
387    ir_dereference *deref = (*rvalue)->as_dereference();
388 
389    if (!deref)
390       return;
391 
392    replace_deref(&deref);
393    *rvalue = deref;
394 }
395 
396 ir_visitor_status
visit_leave(ir_texture * ir)397 ir_variable_replacement_visitor::visit_leave(ir_texture *ir)
398 {
399    replace_deref(&ir->sampler);
400 
401    return visit_continue;
402 }
403 
404 ir_visitor_status
visit_leave(ir_assignment * ir)405 ir_variable_replacement_visitor::visit_leave(ir_assignment *ir)
406 {
407    replace_deref(&ir->lhs);
408    replace_rvalue(&ir->rhs);
409 
410    return visit_continue;
411 }
412 
413 ir_visitor_status
visit_leave(ir_expression * ir)414 ir_variable_replacement_visitor::visit_leave(ir_expression *ir)
415 {
416    for (uint8_t i = 0; i < ir->num_operands; i++)
417       replace_rvalue(&ir->operands[i]);
418 
419    return visit_continue;
420 }
421 
422 ir_visitor_status
visit_leave(ir_return * ir)423 ir_variable_replacement_visitor::visit_leave(ir_return *ir)
424 {
425    replace_rvalue(&ir->value);
426 
427    return visit_continue;
428 }
429 
430 ir_visitor_status
visit_leave(ir_dereference_array * ir)431 ir_variable_replacement_visitor::visit_leave(ir_dereference_array *ir)
432 {
433    replace_rvalue(&ir->array);
434    return visit_continue;
435 }
436 
437 ir_visitor_status
visit_leave(ir_dereference_record * ir)438 ir_variable_replacement_visitor::visit_leave(ir_dereference_record *ir)
439 {
440    replace_rvalue(&ir->record);
441    return visit_continue;
442 }
443 
444 ir_visitor_status
visit_leave(ir_call * ir)445 ir_variable_replacement_visitor::visit_leave(ir_call *ir)
446 {
447    foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) {
448       ir_rvalue *new_param = param;
449       replace_rvalue(&new_param);
450 
451       if (new_param != param) {
452 	 param->replace_with(new_param);
453       }
454    }
455    return visit_continue;
456 }
457 
458 static void
do_variable_replacement(exec_list * instructions,ir_variable * orig,ir_dereference * repl)459 do_variable_replacement(exec_list *instructions,
460                         ir_variable *orig,
461                         ir_dereference *repl)
462 {
463    ir_variable_replacement_visitor v(orig, repl);
464 
465    visit_list_elements(&v, instructions);
466 }
467