• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file opt_function_inlining.cpp
26  *
27  * Replaces calls to functions with the body of the function.
28  */
29 
30 #include "ir.h"
31 #include "ir_visitor.h"
32 #include "ir_rvalue_visitor.h"
33 #include "ir_function_inlining.h"
34 #include "ir_expression_flattening.h"
35 #include "compiler/glsl_types.h"
36 #include "util/hash_table.h"
37 
38 static void
39 do_variable_replacement(exec_list *instructions,
40                         ir_variable *orig,
41                         ir_rvalue *repl);
42 
43 namespace {
44 
45 class ir_function_inlining_visitor : public ir_hierarchical_visitor {
46 public:
ir_function_inlining_visitor()47    ir_function_inlining_visitor()
48    {
49       progress = false;
50    }
51 
~ir_function_inlining_visitor()52    virtual ~ir_function_inlining_visitor()
53    {
54       /* empty */
55    }
56 
57    virtual ir_visitor_status visit_enter(ir_expression *);
58    virtual ir_visitor_status visit_enter(ir_call *);
59    virtual ir_visitor_status visit_enter(ir_return *);
60    virtual ir_visitor_status visit_enter(ir_texture *);
61    virtual ir_visitor_status visit_enter(ir_swizzle *);
62 
63    bool progress;
64 };
65 
66 class ir_save_lvalue_visitor : public ir_hierarchical_visitor {
67 public:
68    virtual ir_visitor_status visit_enter(ir_dereference_array *);
69 };
70 
71 } /* unnamed namespace */
72 
73 bool
do_function_inlining(exec_list * instructions)74 do_function_inlining(exec_list *instructions)
75 {
76    ir_function_inlining_visitor v;
77 
78    v.run(instructions);
79 
80    return v.progress;
81 }
82 
83 static void
replace_return_with_assignment(ir_instruction * ir,void * data)84 replace_return_with_assignment(ir_instruction *ir, void *data)
85 {
86    void *ctx = ralloc_parent(ir);
87    ir_dereference *orig_deref = (ir_dereference *) data;
88    ir_return *ret = ir->as_return();
89 
90    if (ret) {
91       if (ret->value) {
92 	 ir_rvalue *lhs = orig_deref->clone(ctx, NULL);
93          ret->replace_with(new(ctx) ir_assignment(lhs, ret->value));
94       } else {
95 	 /* un-valued return has to be the last return, or we shouldn't
96 	  * have reached here. (see can_inline()).
97 	  */
98 	 assert(ret->next->is_tail_sentinel());
99 	 ret->remove();
100       }
101    }
102 }
103 
104 /* Save the given lvalue before the given instruction.
105  *
106  * This is done by adding temporary variables into which the current value
107  * of any array indices are saved, and then modifying the dereference chain
108  * in-place to point to those temporary variables.
109  *
110  * The hierarchical visitor is only used to traverse the left-hand-side chain
111  * of derefs.
112  */
113 ir_visitor_status
visit_enter(ir_dereference_array * deref)114 ir_save_lvalue_visitor::visit_enter(ir_dereference_array *deref)
115 {
116    if (deref->array_index->ir_type != ir_type_constant) {
117       void *ctx = ralloc_parent(deref);
118       ir_variable *index;
119       ir_assignment *assignment;
120 
121       index = new(ctx) ir_variable(deref->array_index->type, "saved_idx", ir_var_temporary);
122       base_ir->insert_before(index);
123 
124       assignment = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(index),
125                                           deref->array_index);
126       base_ir->insert_before(assignment);
127 
128       deref->array_index = new(ctx) ir_dereference_variable(index);
129    }
130 
131    deref->array->accept(this);
132    return visit_stop;
133 }
134 
135 static bool
should_replace_variable(ir_variable * sig_param,ir_rvalue * param,bool is_builtin)136 should_replace_variable(ir_variable *sig_param, ir_rvalue *param,
137                         bool is_builtin) {
138 
139    if (sig_param->data.mode != ir_var_function_in &&
140        sig_param->data.mode != ir_var_const_in)
141       return false;
142 
143    /* Some places in glsl_to_nir() expect images to always be copied to a temp
144     * first.
145     */
146    if (glsl_type_is_image(glsl_without_array(sig_param->type)) && !param->is_dereference())
147       return false;
148 
149    /* SSBO and shared vars might be passed to a built-in such as an atomic
150     * memory function, where copying these to a temp before passing to the
151     * atomic function is not valid so we must replace these instead. Also,
152     * shader inputs for interpolateAt funtions also need to be replaced.
153     *
154     * Our builtins should always use temps and not the inputs themselves to
155     * store temporay values so just checking is_builtin rather than string
156     * comparing the function name for e.g atomic* should always be safe.
157     */
158    if (is_builtin)
159       return true;
160 
161    /* For opaque types, we want the inlined variable references
162     * referencing the passed in variable, since that will have
163     * the location information, which an assignment of an opaque
164     * variable wouldn't.
165     */
166    return glsl_contains_opaque(sig_param->type);
167 }
168 
169 void
generate_inline(ir_instruction * next_ir)170 ir_call::generate_inline(ir_instruction *next_ir)
171 {
172    void *ctx = ralloc_parent(this);
173    ir_variable **parameters;
174    unsigned num_parameters;
175    int i;
176    struct hash_table *ht;
177 
178    ht = _mesa_pointer_hash_table_create(NULL);
179 
180    num_parameters = this->callee->parameters.length();
181    parameters = new ir_variable *[num_parameters];
182 
183    /* Generate the declarations for the parameters to our inlined code,
184     * and set up the mapping of real function body variables to ours.
185     */
186    i = 0;
187    foreach_two_lists(formal_node, &this->callee->parameters,
188                      actual_node, &this->actual_parameters) {
189       ir_variable *sig_param = (ir_variable *) formal_node;
190       ir_rvalue *param = (ir_rvalue *) actual_node;
191 
192       /* Generate a new variable for the parameter. */
193       if (should_replace_variable(sig_param, param,
194                                   this->callee->is_builtin())) {
195          /* Actual replacement happens below */
196 	 parameters[i] = NULL;
197       } else {
198 	 parameters[i] = sig_param->clone(ctx, ht);
199 	 parameters[i]->data.mode = ir_var_temporary;
200 
201 	 /* Remove the read-only decoration because we're going to write
202 	  * directly to this variable.  If the cloned variable is left
203 	  * read-only and the inlined function is inside a loop, the loop
204 	  * analysis code will get confused.
205 	  */
206 	 parameters[i]->data.read_only = false;
207 	 next_ir->insert_before(parameters[i]);
208       }
209 
210       /* Section 6.1.1 (Function Calling Conventions) of the OpenGL Shading
211        * Language 4.5 spec says:
212        *
213        *    "All arguments are evaluated at call time, exactly once, in order,
214        *     from left to right. [...] Evaluation of an out parameter results
215        *     in an l-value that is used to copy out a value when the function
216        *     returns."
217        *
218        * I.e., we have to take temporary copies of any relevant array indices
219        * before the function body is executed.
220        *
221        * This ensures that
222        * (a) if an array index expressions refers to a variable that is
223        *     modified by the execution of the function body, we use the
224        *     original value as intended, and
225        * (b) if an array index expression has side effects, those side effects
226        *     are only executed once and at the right time.
227        */
228       if (parameters[i]) {
229          if (sig_param->data.mode == ir_var_function_in ||
230              sig_param->data.mode == ir_var_const_in) {
231             ir_assignment *assign;
232 
233             assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]),
234                                             param);
235             next_ir->insert_before(assign);
236          } else {
237             assert(sig_param->data.mode == ir_var_function_out ||
238                    sig_param->data.mode == ir_var_function_inout);
239             assert(param->is_lvalue());
240 
241             ir_save_lvalue_visitor v;
242             v.base_ir = next_ir;
243 
244             param->accept(&v);
245 
246             if (sig_param->data.mode == ir_var_function_inout) {
247                ir_assignment *assign;
248 
249                assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]),
250                                                param->clone(ctx, NULL)->as_rvalue());
251                next_ir->insert_before(assign);
252             }
253          }
254       }
255 
256       ++i;
257    }
258 
259    exec_list new_instructions;
260 
261    /* Generate the inlined body of the function to a new list */
262    foreach_in_list(ir_instruction, ir, &callee->body) {
263       ir_instruction *new_ir = ir->clone(ctx, ht);
264 
265       new_instructions.push_tail(new_ir);
266       visit_tree(new_ir, replace_return_with_assignment, this->return_deref);
267    }
268 
269    /* If any opaque types were passed in, replace any deref of the
270     * opaque variable with a deref of the argument.
271     */
272    foreach_two_lists(formal_node, &this->callee->parameters,
273                      actual_node, &this->actual_parameters) {
274       ir_rvalue *const param = (ir_rvalue *) actual_node;
275       ir_variable *sig_param = (ir_variable *) formal_node;
276 
277       if (should_replace_variable(sig_param, param,
278                                   this->callee->is_builtin())) {
279          do_variable_replacement(&new_instructions, sig_param, param);
280       }
281    }
282 
283    /* Now push those new instructions in. */
284    next_ir->insert_before(&new_instructions);
285 
286    /* Copy back the value of any 'out' parameters from the function body
287     * variables to our own.
288     */
289    i = 0;
290    foreach_two_lists(formal_node, &this->callee->parameters,
291                      actual_node, &this->actual_parameters) {
292       ir_rvalue *const param = (ir_rvalue *) actual_node;
293       const ir_variable *const sig_param = (ir_variable *) formal_node;
294 
295       /* Move our param variable into the actual param if it's an 'out' type. */
296       if (parameters[i] && (sig_param->data.mode == ir_var_function_out ||
297 			    sig_param->data.mode == ir_var_function_inout)) {
298 	 ir_assignment *assign;
299 
300          assign = new(ctx) ir_assignment(param,
301                                          new(ctx) ir_dereference_variable(parameters[i]));
302 	 next_ir->insert_before(assign);
303       }
304 
305       ++i;
306    }
307 
308    delete [] parameters;
309 
310    _mesa_hash_table_destroy(ht, NULL);
311 }
312 
313 
314 ir_visitor_status
visit_enter(ir_expression * ir)315 ir_function_inlining_visitor::visit_enter(ir_expression *ir)
316 {
317    (void) ir;
318    return visit_continue_with_parent;
319 }
320 
321 
322 ir_visitor_status
visit_enter(ir_return * ir)323 ir_function_inlining_visitor::visit_enter(ir_return *ir)
324 {
325    (void) ir;
326    return visit_continue_with_parent;
327 }
328 
329 
330 ir_visitor_status
visit_enter(ir_texture * ir)331 ir_function_inlining_visitor::visit_enter(ir_texture *ir)
332 {
333    (void) ir;
334    return visit_continue_with_parent;
335 }
336 
337 
338 ir_visitor_status
visit_enter(ir_swizzle * ir)339 ir_function_inlining_visitor::visit_enter(ir_swizzle *ir)
340 {
341    (void) ir;
342    return visit_continue_with_parent;
343 }
344 
345 
346 ir_visitor_status
visit_enter(ir_call * ir)347 ir_function_inlining_visitor::visit_enter(ir_call *ir)
348 {
349    if (can_inline(ir)) {
350       ir->generate_inline(ir);
351       ir->remove();
352       this->progress = true;
353    }
354 
355    return visit_continue;
356 }
357 
358 
359 /**
360  * Replaces references to the "orig" variable with a clone of "repl."
361  *
362  * From the spec, opaque types can appear in the tree as function
363  * (non-out) parameters and as the result of array indexing and
364  * structure field selection.  In our builtin implementation, they
365  * also appear in the sampler field of an ir_tex instruction.
366  */
367 
368 class ir_variable_replacement_visitor : public ir_rvalue_visitor {
369 public:
ir_variable_replacement_visitor(ir_variable * orig,ir_rvalue * repl)370    ir_variable_replacement_visitor(ir_variable *orig, ir_rvalue *repl)
371    {
372       this->orig = orig;
373       this->repl = repl;
374    }
375 
~ir_variable_replacement_visitor()376    virtual ~ir_variable_replacement_visitor()
377    {
378    }
379 
380    virtual ir_visitor_status visit_leave(ir_call *);
381    virtual ir_visitor_status visit_leave(ir_texture *);
382    virtual ir_visitor_status visit_leave(ir_assignment *);
383 
384    void handle_rvalue(ir_rvalue **rvalue);
385    void replace_deref(ir_dereference **deref);
386    void replace_rvalue(ir_rvalue **rvalue);
387 
388    ir_variable *orig;
389    ir_rvalue *repl;
390 };
391 
392 void
replace_deref(ir_dereference ** deref)393 ir_variable_replacement_visitor::replace_deref(ir_dereference **deref)
394 {
395    ir_dereference_variable *deref_var = (*deref)->as_dereference_variable();
396    if (deref_var && deref_var->var == this->orig)
397       *deref = this->repl->as_dereference()->clone(ralloc_parent(*deref), NULL);
398 }
399 
400 void
handle_rvalue(ir_rvalue ** rvalue)401 ir_variable_replacement_visitor::handle_rvalue(ir_rvalue **rvalue)
402 {
403    replace_rvalue(rvalue);
404 }
405 
406 void
replace_rvalue(ir_rvalue ** rvalue)407 ir_variable_replacement_visitor::replace_rvalue(ir_rvalue **rvalue)
408 {
409    if (!*rvalue)
410       return;
411 
412    ir_dereference *deref = (*rvalue)->as_dereference();
413 
414    if (!deref)
415       return;
416 
417    ir_dereference_variable *deref_var = (deref)->as_dereference_variable();
418    if (deref_var && deref_var->var == this->orig)
419       *rvalue = this->repl->clone(ralloc_parent(deref), NULL);
420 }
421 
422 ir_visitor_status
visit_leave(ir_texture * ir)423 ir_variable_replacement_visitor::visit_leave(ir_texture *ir)
424 {
425    replace_deref(&ir->sampler);
426 
427    return rvalue_visit(ir);
428 }
429 
430 ir_visitor_status
visit_leave(ir_assignment * ir)431 ir_variable_replacement_visitor::visit_leave(ir_assignment *ir)
432 {
433    replace_deref(&ir->lhs);
434    replace_rvalue(&ir->rhs);
435 
436    return visit_continue;
437 }
438 
439 ir_visitor_status
visit_leave(ir_call * ir)440 ir_variable_replacement_visitor::visit_leave(ir_call *ir)
441 {
442    foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) {
443       ir_rvalue *new_param = param;
444       replace_rvalue(&new_param);
445 
446       if (new_param != param) {
447          param->replace_with(new_param);
448       }
449    }
450    return visit_continue;
451 }
452 
453 static void
do_variable_replacement(exec_list * instructions,ir_variable * orig,ir_rvalue * repl)454 do_variable_replacement(exec_list *instructions,
455                         ir_variable *orig,
456                         ir_rvalue *repl)
457 {
458    ir_variable_replacement_visitor v(orig, repl);
459 
460    visit_list_elements(&v, instructions);
461 }
462