1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_copy_propagation_elements.cpp
26 *
27 * Replaces usage of recently-copied components of variables with the
28 * previous copy of the variable.
29 *
30 * This pass can be compared with opt_copy_propagation, which operands
31 * on arbitrary whole-variable copies. However, in order to handle
32 * the copy propagation of swizzled variables or writemasked writes,
33 * we want to track things on a channel-wise basis. I found that
34 * trying to mix the swizzled/writemasked support here with the
35 * whole-variable stuff in opt_copy_propagation.cpp just made a mess,
36 * so this is separate despite the ACP handling being somewhat
37 * similar.
38 *
39 * This should reduce the number of MOV instructions in the generated
40 * programs unless copy propagation is also done on the LIR, and may
41 * help anyway by triggering other optimizations that live in the HIR.
42 */
43
44 #include "ir.h"
45 #include "ir_rvalue_visitor.h"
46 #include "ir_basic_block.h"
47 #include "ir_optimization.h"
48 #include "compiler/glsl_types.h"
49 #include "util/hash_table.h"
50
51 static bool debug = false;
52
53 namespace {
54
55 class acp_entry;
56
57 /* Class that refers to acp_entry in another exec_list. Used
58 * when making removals based on rhs.
59 */
60 class acp_ref : public exec_node
61 {
62 public:
acp_ref(acp_entry * e)63 acp_ref(acp_entry *e)
64 {
65 entry = e;
66 }
67 acp_entry *entry;
68 };
69
70 class acp_entry : public exec_node
71 {
72 public:
73 /* override operator new from exec_node */
74 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry)
75
acp_entry(ir_variable * lhs,ir_variable * rhs,int write_mask,int swizzle[4])76 acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4])
77 : rhs_node(this)
78 {
79 this->lhs = lhs;
80 this->rhs = rhs;
81 this->write_mask = write_mask;
82 memcpy(this->swizzle, swizzle, sizeof(this->swizzle));
83 }
84
85 ir_variable *lhs;
86 ir_variable *rhs;
87 unsigned int write_mask;
88 int swizzle[4];
89 acp_ref rhs_node;
90 };
91
92
93 class kill_entry : public exec_node
94 {
95 public:
96 /* override operator new from exec_node */
97 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry)
98
kill_entry(ir_variable * var,int write_mask)99 kill_entry(ir_variable *var, int write_mask)
100 {
101 this->var = var;
102 this->write_mask = write_mask;
103 }
104
105 ir_variable *var;
106 unsigned int write_mask;
107 };
108
109 class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
110 public:
ir_copy_propagation_elements_visitor()111 ir_copy_propagation_elements_visitor()
112 {
113 this->progress = false;
114 this->killed_all = false;
115 this->mem_ctx = ralloc_context(NULL);
116 this->lin_ctx = linear_alloc_parent(this->mem_ctx, 0);
117 this->shader_mem_ctx = NULL;
118 this->kills = new(mem_ctx) exec_list;
119
120 create_acp();
121 }
~ir_copy_propagation_elements_visitor()122 ~ir_copy_propagation_elements_visitor()
123 {
124 ralloc_free(mem_ctx);
125 }
126
create_acp()127 void create_acp()
128 {
129 lhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
130 _mesa_key_pointer_equal);
131 rhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
132 _mesa_key_pointer_equal);
133 }
134
destroy_acp()135 void destroy_acp()
136 {
137 _mesa_hash_table_destroy(lhs_ht, NULL);
138 _mesa_hash_table_destroy(rhs_ht, NULL);
139 }
140
populate_acp(hash_table * lhs,hash_table * rhs)141 void populate_acp(hash_table *lhs, hash_table *rhs)
142 {
143 struct hash_entry *entry;
144
145 hash_table_foreach(lhs, entry) {
146 _mesa_hash_table_insert(lhs_ht, entry->key, entry->data);
147 }
148
149 hash_table_foreach(rhs, entry) {
150 _mesa_hash_table_insert(rhs_ht, entry->key, entry->data);
151 }
152 }
153
154 void handle_loop(ir_loop *, bool keep_acp);
155 virtual ir_visitor_status visit_enter(class ir_loop *);
156 virtual ir_visitor_status visit_enter(class ir_function_signature *);
157 virtual ir_visitor_status visit_leave(class ir_assignment *);
158 virtual ir_visitor_status visit_enter(class ir_call *);
159 virtual ir_visitor_status visit_enter(class ir_if *);
160 virtual ir_visitor_status visit_leave(class ir_swizzle *);
161
162 void handle_rvalue(ir_rvalue **rvalue);
163
164 void add_copy(ir_assignment *ir);
165 void kill(kill_entry *k);
166 void handle_if_block(exec_list *instructions);
167
168 /** Hash of acp_entry: The available copies to propagate */
169 hash_table *lhs_ht;
170 hash_table *rhs_ht;
171
172 /**
173 * List of kill_entry: The variables whose values were killed in this
174 * block.
175 */
176 exec_list *kills;
177
178 bool progress;
179
180 bool killed_all;
181
182 /* Context for our local data structures. */
183 void *mem_ctx;
184 void *lin_ctx;
185 /* Context for allocating new shader nodes. */
186 void *shader_mem_ctx;
187 };
188
189 } /* unnamed namespace */
190
191 ir_visitor_status
visit_enter(ir_function_signature * ir)192 ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
193 {
194 /* Treat entry into a function signature as a completely separate
195 * block. Any instructions at global scope will be shuffled into
196 * main() at link time, so they're irrelevant to us.
197 */
198 exec_list *orig_kills = this->kills;
199 bool orig_killed_all = this->killed_all;
200
201 hash_table *orig_lhs_ht = lhs_ht;
202 hash_table *orig_rhs_ht = rhs_ht;
203
204 this->kills = new(mem_ctx) exec_list;
205 this->killed_all = false;
206
207 create_acp();
208
209 visit_list_elements(this, &ir->body);
210
211 ralloc_free(this->kills);
212
213 destroy_acp();
214
215 this->kills = orig_kills;
216 this->killed_all = orig_killed_all;
217
218 lhs_ht = orig_lhs_ht;
219 rhs_ht = orig_rhs_ht;
220
221 return visit_continue_with_parent;
222 }
223
224 ir_visitor_status
visit_leave(ir_assignment * ir)225 ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
226 {
227 ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
228 ir_variable *var = ir->lhs->variable_referenced();
229
230 if (var->type->is_scalar() || var->type->is_vector()) {
231 kill_entry *k;
232
233 if (lhs)
234 k = new(this->lin_ctx) kill_entry(var, ir->write_mask);
235 else
236 k = new(this->lin_ctx) kill_entry(var, ~0);
237
238 kill(k);
239 }
240
241 add_copy(ir);
242
243 return visit_continue;
244 }
245
246 ir_visitor_status
visit_leave(ir_swizzle *)247 ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *)
248 {
249 /* Don't visit the values of swizzles since they are handled while
250 * visiting the swizzle itself.
251 */
252 return visit_continue;
253 }
254
255 /**
256 * Replaces dereferences of ACP RHS variables with ACP LHS variables.
257 *
258 * This is where the actual copy propagation occurs. Note that the
259 * rewriting of ir_dereference means that the ir_dereference instance
260 * must not be shared by multiple IR operations!
261 */
262 void
handle_rvalue(ir_rvalue ** ir)263 ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
264 {
265 int swizzle_chan[4];
266 ir_dereference_variable *deref_var;
267 ir_variable *source[4] = {NULL, NULL, NULL, NULL};
268 int source_chan[4] = {0, 0, 0, 0};
269 int chans;
270 bool noop_swizzle = true;
271
272 if (!*ir)
273 return;
274
275 ir_swizzle *swizzle = (*ir)->as_swizzle();
276 if (swizzle) {
277 deref_var = swizzle->val->as_dereference_variable();
278 if (!deref_var)
279 return;
280
281 swizzle_chan[0] = swizzle->mask.x;
282 swizzle_chan[1] = swizzle->mask.y;
283 swizzle_chan[2] = swizzle->mask.z;
284 swizzle_chan[3] = swizzle->mask.w;
285 chans = swizzle->type->vector_elements;
286 } else {
287 deref_var = (*ir)->as_dereference_variable();
288 if (!deref_var)
289 return;
290
291 swizzle_chan[0] = 0;
292 swizzle_chan[1] = 1;
293 swizzle_chan[2] = 2;
294 swizzle_chan[3] = 3;
295 chans = deref_var->type->vector_elements;
296 }
297
298 if (this->in_assignee)
299 return;
300
301 ir_variable *var = deref_var->var;
302
303 /* Try to find ACP entries covering swizzle_chan[], hoping they're
304 * the same source variable.
305 */
306 hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, var);
307 if (ht_entry) {
308 exec_list *ht_list = (exec_list *) ht_entry->data;
309 foreach_in_list(acp_entry, entry, ht_list) {
310 for (int c = 0; c < chans; c++) {
311 if (entry->write_mask & (1 << swizzle_chan[c])) {
312 source[c] = entry->rhs;
313 source_chan[c] = entry->swizzle[swizzle_chan[c]];
314
315 if (source_chan[c] != swizzle_chan[c])
316 noop_swizzle = false;
317 }
318 }
319 }
320 }
321
322 /* Make sure all channels are copying from the same source variable. */
323 if (!source[0])
324 return;
325 for (int c = 1; c < chans; c++) {
326 if (source[c] != source[0])
327 return;
328 }
329
330 if (!shader_mem_ctx)
331 shader_mem_ctx = ralloc_parent(deref_var);
332
333 /* Don't pointlessly replace the rvalue with itself (or a noop swizzle
334 * of itself, which would just be deleted by opt_noop_swizzle).
335 */
336 if (source[0] == var && noop_swizzle)
337 return;
338
339 if (debug) {
340 printf("Copy propagation from:\n");
341 (*ir)->print();
342 }
343
344 deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
345 *ir = new(shader_mem_ctx) ir_swizzle(deref_var,
346 source_chan[0],
347 source_chan[1],
348 source_chan[2],
349 source_chan[3],
350 chans);
351 progress = true;
352
353 if (debug) {
354 printf("to:\n");
355 (*ir)->print();
356 printf("\n");
357 }
358 }
359
360
361 ir_visitor_status
visit_enter(ir_call * ir)362 ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
363 {
364 /* Do copy propagation on call parameters, but skip any out params */
365 foreach_two_lists(formal_node, &ir->callee->parameters,
366 actual_node, &ir->actual_parameters) {
367 ir_variable *sig_param = (ir_variable *) formal_node;
368 ir_rvalue *ir = (ir_rvalue *) actual_node;
369 if (sig_param->data.mode != ir_var_function_out
370 && sig_param->data.mode != ir_var_function_inout) {
371 ir->accept(this);
372 }
373 }
374
375 /* Since we're unlinked, we don't (necessarily) know the side effects of
376 * this call. So kill all copies.
377 */
378 _mesa_hash_table_clear(lhs_ht, NULL);
379 _mesa_hash_table_clear(rhs_ht, NULL);
380
381 this->killed_all = true;
382
383 return visit_continue_with_parent;
384 }
385
386 void
handle_if_block(exec_list * instructions)387 ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
388 {
389 exec_list *orig_kills = this->kills;
390 bool orig_killed_all = this->killed_all;
391
392 hash_table *orig_lhs_ht = lhs_ht;
393 hash_table *orig_rhs_ht = rhs_ht;
394
395 this->kills = new(mem_ctx) exec_list;
396 this->killed_all = false;
397
398 create_acp();
399
400 /* Populate the initial acp with a copy of the original */
401 populate_acp(orig_lhs_ht, orig_rhs_ht);
402
403 visit_list_elements(this, instructions);
404
405 if (this->killed_all) {
406 _mesa_hash_table_clear(orig_lhs_ht, NULL);
407 _mesa_hash_table_clear(orig_rhs_ht, NULL);
408 }
409
410 exec_list *new_kills = this->kills;
411 this->kills = orig_kills;
412 this->killed_all = this->killed_all || orig_killed_all;
413
414 destroy_acp();
415
416 lhs_ht = orig_lhs_ht;
417 rhs_ht = orig_rhs_ht;
418
419 /* Move the new kills into the parent block's list, removing them
420 * from the parent's ACP list in the process.
421 */
422 foreach_in_list_safe(kill_entry, k, new_kills) {
423 kill(k);
424 }
425
426 ralloc_free(new_kills);
427 }
428
429 ir_visitor_status
visit_enter(ir_if * ir)430 ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
431 {
432 ir->condition->accept(this);
433
434 handle_if_block(&ir->then_instructions);
435 handle_if_block(&ir->else_instructions);
436
437 /* handle_if_block() already descended into the children. */
438 return visit_continue_with_parent;
439 }
440
441 void
handle_loop(ir_loop * ir,bool keep_acp)442 ir_copy_propagation_elements_visitor::handle_loop(ir_loop *ir, bool keep_acp)
443 {
444 exec_list *orig_kills = this->kills;
445 bool orig_killed_all = this->killed_all;
446
447 hash_table *orig_lhs_ht = lhs_ht;
448 hash_table *orig_rhs_ht = rhs_ht;
449
450 /* FINISHME: For now, the initial acp for loops is totally empty.
451 * We could go through once, then go through again with the acp
452 * cloned minus the killed entries after the first run through.
453 */
454 this->kills = new(mem_ctx) exec_list;
455 this->killed_all = false;
456
457 create_acp();
458
459 if (keep_acp) {
460 /* Populate the initial acp with a copy of the original */
461 populate_acp(orig_lhs_ht, orig_rhs_ht);
462 }
463
464 visit_list_elements(this, &ir->body_instructions);
465
466 if (this->killed_all) {
467 _mesa_hash_table_clear(orig_lhs_ht, NULL);
468 _mesa_hash_table_clear(orig_rhs_ht, NULL);
469 }
470
471 exec_list *new_kills = this->kills;
472 this->kills = orig_kills;
473 this->killed_all = this->killed_all || orig_killed_all;
474
475 destroy_acp();
476
477 lhs_ht = orig_lhs_ht;
478 rhs_ht = orig_rhs_ht;
479
480 foreach_in_list_safe(kill_entry, k, new_kills) {
481 kill(k);
482 }
483
484 ralloc_free(new_kills);
485 }
486
487 ir_visitor_status
visit_enter(ir_loop * ir)488 ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
489 {
490 handle_loop(ir, false);
491 handle_loop(ir, true);
492
493 /* already descended into the children. */
494 return visit_continue_with_parent;
495 }
496
497 /* Remove any entries currently in the ACP for this kill. */
498 void
kill(kill_entry * k)499 ir_copy_propagation_elements_visitor::kill(kill_entry *k)
500 {
501 /* removal of lhs entries */
502 hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, k->var);
503 if (ht_entry) {
504 exec_list *lhs_list = (exec_list *) ht_entry->data;
505 foreach_in_list_safe(acp_entry, entry, lhs_list) {
506 entry->write_mask = entry->write_mask & ~k->write_mask;
507 if (entry->write_mask == 0) {
508 entry->remove();
509 continue;
510 }
511 }
512 }
513
514 /* removal of rhs entries */
515 ht_entry = _mesa_hash_table_search(rhs_ht, k->var);
516 if (ht_entry) {
517 exec_list *rhs_list = (exec_list *) ht_entry->data;
518 acp_ref *ref;
519
520 while ((ref = (acp_ref *) rhs_list->pop_head()) != NULL) {
521 acp_entry *entry = ref->entry;
522
523 /* If entry is still in a list (not already removed by lhs entry
524 * removal above), remove it.
525 */
526 if (entry->prev || entry->next)
527 entry->remove();
528 }
529 }
530
531 /* If we were on a list, remove ourselves before inserting */
532 if (k->next)
533 k->remove();
534
535 this->kills->push_tail(k);
536 }
537
538 /**
539 * Adds directly-copied channels between vector variables to the available
540 * copy propagation list.
541 */
542 void
add_copy(ir_assignment * ir)543 ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
544 {
545 acp_entry *entry;
546 int orig_swizzle[4] = {0, 1, 2, 3};
547 int swizzle[4];
548
549 if (ir->condition)
550 return;
551
552 ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
553 if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
554 return;
555
556 ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
557 if (!rhs) {
558 ir_swizzle *swiz = ir->rhs->as_swizzle();
559 if (!swiz)
560 return;
561
562 rhs = swiz->val->as_dereference_variable();
563 if (!rhs)
564 return;
565
566 orig_swizzle[0] = swiz->mask.x;
567 orig_swizzle[1] = swiz->mask.y;
568 orig_swizzle[2] = swiz->mask.z;
569 orig_swizzle[3] = swiz->mask.w;
570 }
571
572 /* Move the swizzle channels out to the positions they match in the
573 * destination. We don't want to have to rewrite the swizzle[]
574 * array every time we clear a bit of the write_mask.
575 */
576 int j = 0;
577 for (int i = 0; i < 4; i++) {
578 if (ir->write_mask & (1 << i))
579 swizzle[i] = orig_swizzle[j++];
580 }
581
582 int write_mask = ir->write_mask;
583 if (lhs->var == rhs->var) {
584 /* If this is a copy from the variable to itself, then we need
585 * to be sure not to include the updated channels from this
586 * instruction in the set of new source channels to be
587 * copy-propagated from.
588 */
589 for (int i = 0; i < 4; i++) {
590 if (ir->write_mask & (1 << orig_swizzle[i]))
591 write_mask &= ~(1 << i);
592 }
593 }
594
595 if (lhs->var->data.precise != rhs->var->data.precise)
596 return;
597
598 entry = new(this->lin_ctx) acp_entry(lhs->var, rhs->var, write_mask,
599 swizzle);
600
601 /* lhs hash, hash of lhs -> acp_entry lists */
602 hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, lhs->var);
603 if (ht_entry) {
604 exec_list *lhs_list = (exec_list *) ht_entry->data;
605 lhs_list->push_tail(entry);
606 } else {
607 exec_list *lhs_list = new(mem_ctx) exec_list;
608 lhs_list->push_tail(entry);
609 _mesa_hash_table_insert(lhs_ht, lhs->var, lhs_list);
610 }
611
612 /* rhs hash, hash of rhs -> acp_entry pointers to lhs lists */
613 ht_entry = _mesa_hash_table_search(rhs_ht, rhs->var);
614 if (ht_entry) {
615 exec_list *rhs_list = (exec_list *) ht_entry->data;
616 rhs_list->push_tail(&entry->rhs_node);
617 } else {
618 exec_list *rhs_list = new(mem_ctx) exec_list;
619 rhs_list->push_tail(&entry->rhs_node);
620 _mesa_hash_table_insert(rhs_ht, rhs->var, rhs_list);
621 }
622 }
623
624 bool
do_copy_propagation_elements(exec_list * instructions)625 do_copy_propagation_elements(exec_list *instructions)
626 {
627 ir_copy_propagation_elements_visitor v;
628
629 visit_list_elements(&v, instructions);
630
631 return v.progress;
632 }
633