1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_tree_grafting.cpp
26 *
27 * Takes assignments to variables that are dereferenced only once and
28 * pastes the RHS expression into where the variable is dereferenced.
29 *
30 * In the process of various operations like function inlining and
31 * tertiary op handling, we'll end up with our expression trees having
32 * been chopped up into a series of assignments of short expressions
33 * to temps. Other passes like ir_algebraic.cpp would prefer to see
34 * the deepest expression trees they can to try to optimize them.
35 *
36 * This is a lot like copy propagaton. In comparison, copy
37 * propagation only acts on plain copies, not arbitrary expressions on
38 * the RHS. Generally, we wouldn't want to go pasting some
39 * complicated expression everywhere it got used, though, so we don't
40 * handle expressions in that pass.
41 *
42 * The hard part is making sure we don't move an expression across
43 * some other assignments that would change the value of the
44 * expression. So we split this into two passes: First, find the
45 * variables in our scope which are written to once and read once, and
46 * then go through basic blocks seeing if we find an opportunity to
47 * move those expressions safely.
48 */
49
50 #include "ir.h"
51 #include "ir_visitor.h"
52 #include "ir_variable_refcount.h"
53 #include "ir_basic_block.h"
54 #include "ir_optimization.h"
55 #include "compiler/glsl_types.h"
56
57 namespace {
58
59 static bool debug = false;
60
61 class ir_tree_grafting_visitor : public ir_hierarchical_visitor {
62 public:
ir_tree_grafting_visitor(ir_assignment * graft_assign,ir_variable * graft_var)63 ir_tree_grafting_visitor(ir_assignment *graft_assign,
64 ir_variable *graft_var)
65 {
66 this->progress = false;
67 this->graft_assign = graft_assign;
68 this->graft_var = graft_var;
69 }
70
71 virtual ir_visitor_status visit_leave(class ir_assignment *);
72 virtual ir_visitor_status visit_enter(class ir_call *);
73 virtual ir_visitor_status visit_enter(class ir_expression *);
74 virtual ir_visitor_status visit_enter(class ir_function *);
75 virtual ir_visitor_status visit_enter(class ir_function_signature *);
76 virtual ir_visitor_status visit_enter(class ir_if *);
77 virtual ir_visitor_status visit_enter(class ir_loop *);
78 virtual ir_visitor_status visit_enter(class ir_swizzle *);
79 virtual ir_visitor_status visit_enter(class ir_texture *);
80
81 ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var);
82
83 bool do_graft(ir_rvalue **rvalue);
84
85 bool progress;
86 ir_variable *graft_var;
87 ir_assignment *graft_assign;
88 };
89
90 struct find_deref_info {
91 ir_variable *var;
92 bool found;
93 };
94
95 void
dereferences_variable_callback(ir_instruction * ir,void * data)96 dereferences_variable_callback(ir_instruction *ir, void *data)
97 {
98 struct find_deref_info *info = (struct find_deref_info *)data;
99 ir_dereference_variable *deref = ir->as_dereference_variable();
100
101 if (deref && deref->var == info->var)
102 info->found = true;
103 }
104
105 static bool
dereferences_variable(ir_instruction * ir,ir_variable * var)106 dereferences_variable(ir_instruction *ir, ir_variable *var)
107 {
108 struct find_deref_info info;
109
110 info.var = var;
111 info.found = false;
112
113 visit_tree(ir, dereferences_variable_callback, &info);
114
115 return info.found;
116 }
117
118 bool
do_graft(ir_rvalue ** rvalue)119 ir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue)
120 {
121 if (!*rvalue)
122 return false;
123
124 ir_dereference_variable *deref = (*rvalue)->as_dereference_variable();
125
126 if (!deref || deref->var != this->graft_var)
127 return false;
128
129 if (debug) {
130 fprintf(stderr, "GRAFTING:\n");
131 this->graft_assign->fprint(stderr);
132 fprintf(stderr, "\n");
133 fprintf(stderr, "TO:\n");
134 (*rvalue)->fprint(stderr);
135 fprintf(stderr, "\n");
136 }
137
138 this->graft_assign->remove();
139 *rvalue = this->graft_assign->rhs;
140
141 this->progress = true;
142 return true;
143 }
144
145 ir_visitor_status
visit_enter(ir_loop * ir)146 ir_tree_grafting_visitor::visit_enter(ir_loop *ir)
147 {
148 (void)ir;
149 /* Do not traverse into the body of the loop since that is a
150 * different basic block.
151 */
152 return visit_stop;
153 }
154
155 /**
156 * Check if we can continue grafting after writing to a variable. If the
157 * expression we're trying to graft references the variable, we must stop.
158 *
159 * \param ir An instruction that writes to a variable.
160 * \param var The variable being updated.
161 */
162 ir_visitor_status
check_graft(ir_instruction * ir,ir_variable * var)163 ir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var)
164 {
165 if (dereferences_variable(this->graft_assign->rhs, var)) {
166 if (debug) {
167 fprintf(stderr, "graft killed by: ");
168 ir->fprint(stderr);
169 fprintf(stderr, "\n");
170 }
171 return visit_stop;
172 }
173
174 return visit_continue;
175 }
176
177 ir_visitor_status
visit_leave(ir_assignment * ir)178 ir_tree_grafting_visitor::visit_leave(ir_assignment *ir)
179 {
180 if (do_graft(&ir->rhs) ||
181 do_graft(&ir->condition))
182 return visit_stop;
183
184 /* If this assignment updates a variable used in the assignment
185 * we're trying to graft, then we're done.
186 */
187 return check_graft(ir, ir->lhs->variable_referenced());
188 }
189
190 ir_visitor_status
visit_enter(ir_function * ir)191 ir_tree_grafting_visitor::visit_enter(ir_function *ir)
192 {
193 (void) ir;
194 return visit_continue_with_parent;
195 }
196
197 ir_visitor_status
visit_enter(ir_function_signature * ir)198 ir_tree_grafting_visitor::visit_enter(ir_function_signature *ir)
199 {
200 (void)ir;
201 return visit_continue_with_parent;
202 }
203
204 ir_visitor_status
visit_enter(ir_call * ir)205 ir_tree_grafting_visitor::visit_enter(ir_call *ir)
206 {
207 foreach_two_lists(formal_node, &ir->callee->parameters,
208 actual_node, &ir->actual_parameters) {
209 ir_variable *sig_param = (ir_variable *) formal_node;
210 ir_rvalue *ir = (ir_rvalue *) actual_node;
211 ir_rvalue *new_ir = ir;
212
213 if (sig_param->data.mode != ir_var_function_in
214 && sig_param->data.mode != ir_var_const_in) {
215 if (check_graft(ir, sig_param) == visit_stop)
216 return visit_stop;
217 continue;
218 }
219
220 if (do_graft(&new_ir)) {
221 ir->replace_with(new_ir);
222 return visit_stop;
223 }
224 }
225
226 if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop)
227 return visit_stop;
228
229 return visit_continue;
230 }
231
232 ir_visitor_status
visit_enter(ir_expression * ir)233 ir_tree_grafting_visitor::visit_enter(ir_expression *ir)
234 {
235 for (unsigned int i = 0; i < ir->num_operands; i++) {
236 if (do_graft(&ir->operands[i]))
237 return visit_stop;
238 }
239
240 return visit_continue;
241 }
242
243 ir_visitor_status
visit_enter(ir_if * ir)244 ir_tree_grafting_visitor::visit_enter(ir_if *ir)
245 {
246 if (do_graft(&ir->condition))
247 return visit_stop;
248
249 /* Do not traverse into the body of the if-statement since that is a
250 * different basic block.
251 */
252 return visit_continue_with_parent;
253 }
254
255 ir_visitor_status
visit_enter(ir_swizzle * ir)256 ir_tree_grafting_visitor::visit_enter(ir_swizzle *ir)
257 {
258 if (do_graft(&ir->val))
259 return visit_stop;
260
261 return visit_continue;
262 }
263
264 ir_visitor_status
visit_enter(ir_texture * ir)265 ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
266 {
267 if (do_graft(&ir->coordinate) ||
268 do_graft(&ir->projector) ||
269 do_graft(&ir->offset) ||
270 do_graft(&ir->shadow_comparator))
271 return visit_stop;
272
273 switch (ir->op) {
274 case ir_tex:
275 case ir_lod:
276 case ir_query_levels:
277 case ir_texture_samples:
278 case ir_samples_identical:
279 break;
280 case ir_txb:
281 if (do_graft(&ir->lod_info.bias))
282 return visit_stop;
283 break;
284 case ir_txf:
285 case ir_txl:
286 case ir_txs:
287 if (do_graft(&ir->lod_info.lod))
288 return visit_stop;
289 break;
290 case ir_txf_ms:
291 if (do_graft(&ir->lod_info.sample_index))
292 return visit_stop;
293 break;
294 case ir_txd:
295 if (do_graft(&ir->lod_info.grad.dPdx) ||
296 do_graft(&ir->lod_info.grad.dPdy))
297 return visit_stop;
298 break;
299 case ir_tg4:
300 if (do_graft(&ir->lod_info.component))
301 return visit_stop;
302 break;
303 }
304
305 return visit_continue;
306 }
307
308 struct tree_grafting_info {
309 ir_variable_refcount_visitor *refs;
310 bool progress;
311 };
312
313 static bool
try_tree_grafting(ir_assignment * start,ir_variable * lhs_var,ir_instruction * bb_last)314 try_tree_grafting(ir_assignment *start,
315 ir_variable *lhs_var,
316 ir_instruction *bb_last)
317 {
318 ir_tree_grafting_visitor v(start, lhs_var);
319
320 if (debug) {
321 fprintf(stderr, "trying to graft: ");
322 lhs_var->fprint(stderr);
323 fprintf(stderr, "\n");
324 }
325
326 for (ir_instruction *ir = (ir_instruction *)start->next;
327 ir != bb_last->next;
328 ir = (ir_instruction *)ir->next) {
329
330 if (debug) {
331 fprintf(stderr, "- ");
332 ir->fprint(stderr);
333 fprintf(stderr, "\n");
334 }
335
336 ir_visitor_status s = ir->accept(&v);
337 if (s == visit_stop)
338 return v.progress;
339 }
340
341 return false;
342 }
343
344 static void
tree_grafting_basic_block(ir_instruction * bb_first,ir_instruction * bb_last,void * data)345 tree_grafting_basic_block(ir_instruction *bb_first,
346 ir_instruction *bb_last,
347 void *data)
348 {
349 struct tree_grafting_info *info = (struct tree_grafting_info *)data;
350 ir_instruction *ir, *next;
351
352 for (ir = bb_first, next = (ir_instruction *)ir->next;
353 ir != bb_last->next;
354 ir = next, next = (ir_instruction *)ir->next) {
355 ir_assignment *assign = ir->as_assignment();
356
357 if (!assign)
358 continue;
359
360 ir_variable *lhs_var = assign->whole_variable_written();
361 if (!lhs_var)
362 continue;
363
364 if (lhs_var->data.mode == ir_var_function_out ||
365 lhs_var->data.mode == ir_var_function_inout ||
366 lhs_var->data.mode == ir_var_shader_out ||
367 lhs_var->data.mode == ir_var_shader_storage ||
368 lhs_var->data.mode == ir_var_shader_shared)
369 continue;
370
371 if (lhs_var->data.precise)
372 continue;
373
374 /* Do not graft sampler and image variables. This is a workaround to
375 * st/glsl_to_tgsi being unable to handle expression parameters to image
376 * intrinsics.
377 *
378 * Note that if this is ever fixed, we still need to skip grafting when
379 * any image layout qualifiers (including the image format) are set,
380 * since we must not lose those.
381 */
382 if (lhs_var->type->is_sampler() || lhs_var->type->is_image())
383 continue;
384
385 ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
386
387 if (!entry->declaration ||
388 entry->assigned_count != 1 ||
389 entry->referenced_count != 2)
390 continue;
391
392 /* Found a possibly graftable assignment. Now, walk through the
393 * rest of the BB seeing if the deref is here, and if nothing interfered with
394 * pasting its expression's values in between.
395 */
396 info->progress |= try_tree_grafting(assign, lhs_var, bb_last);
397 }
398 }
399
400 } /* unnamed namespace */
401
402 /**
403 * Does a copy propagation pass on the code present in the instruction stream.
404 */
405 bool
do_tree_grafting(exec_list * instructions)406 do_tree_grafting(exec_list *instructions)
407 {
408 ir_variable_refcount_visitor refs;
409 struct tree_grafting_info info;
410
411 info.progress = false;
412 info.refs = &refs;
413
414 visit_list_elements(info.refs, instructions);
415
416 call_for_basic_blocks(instructions, tree_grafting_basic_block, &info);
417
418 return info.progress;
419 }
420