1 /*
2 * Copyright © 2017 Gert Wollny
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "st_glsl_to_tgsi_temprename.h"
25 #include "tgsi/tgsi_info.h"
26 #include "tgsi/tgsi_strings.h"
27 #include "program/prog_instruction.h"
28 #include "util/bitscan.h"
29 #include <limits>
30 #include <cstdlib>
31
32 /* std::sort is significantly faster than qsort */
33 #define USE_STL_SORT
34 #ifdef USE_STL_SORT
35 #include <algorithm>
36 #endif
37
38 #ifndef NDEBUG
39 #include <iostream>
40 #include <iomanip>
41 #include "program/prog_print.h"
42 #include "util/debug.h"
43 using std::cerr;
44 using std::setw;
45 #endif
46
47 /* If <windows.h> is included this is defined and clashes with
48 * std::numeric_limits<>::max()
49 */
50 #ifdef max
51 #undef max
52 #endif
53
54 using std::numeric_limits;
55
56 /* Without c++11 define the nullptr for forward-compatibility
57 * and better readibility */
58 #if __cplusplus < 201103L
59 #define nullptr 0
60 #endif
61
62 #ifndef NDEBUG
63 /* Helper function to check whether we want to seen debugging output */
is_debug_enabled()64 static inline bool is_debug_enabled ()
65 {
66 static int debug_enabled = -1;
67 if (debug_enabled < 0)
68 debug_enabled = env_var_as_boolean("GLSL_TO_TGSI_RENAME_DEBUG", false);
69 return debug_enabled > 0;
70 }
71 #define RENAME_DEBUG(X) if (is_debug_enabled()) do { X; } while (false);
72 #else
73 #define RENAME_DEBUG(X)
74 #endif
75
76 namespace {
77
78 enum prog_scope_type {
79 outer_scope, /* Outer program scope */
80 loop_body, /* Inside a loop */
81 if_branch, /* Inside if branch */
82 else_branch, /* Inside else branch */
83 switch_body, /* Inside switch statmenet */
84 switch_case_branch, /* Inside switch case statmenet */
85 switch_default_branch, /* Inside switch default statmenet */
86 undefined_scope
87 };
88
89 class prog_scope {
90 public:
91 prog_scope(prog_scope *parent, prog_scope_type type, int id,
92 int depth, int begin);
93
94 prog_scope_type type() const;
95 prog_scope *parent() const;
96 int nesting_depth() const;
97 int id() const;
98 int end() const;
99 int begin() const;
100 int loop_break_line() const;
101
102 const prog_scope *in_ifelse_scope() const;
103 const prog_scope *in_switchcase_scope() const;
104 const prog_scope *innermost_loop() const;
105 const prog_scope *outermost_loop() const;
106 const prog_scope *enclosing_conditional() const;
107
108 bool is_loop() const;
109 bool is_in_loop() const;
110 bool is_conditional() const;
111
112 bool break_is_for_switchcase() const;
113 bool contains_range_of(const prog_scope& other) const;
114
115 void set_end(int end);
116 void set_loop_break_line(int line);
117
118 private:
119 prog_scope_type scope_type;
120 int scope_id;
121 int scope_nesting_depth;
122 int scope_begin;
123 int scope_end;
124 int break_loop_line;
125 prog_scope *parent_scope;
126 };
127
128 /* Some storage class to encapsulate the prog_scope (de-)allocations */
129 class prog_scope_storage {
130 public:
131 prog_scope_storage(void *mem_ctx, int n);
132 ~prog_scope_storage();
133 prog_scope * create(prog_scope *p, prog_scope_type type, int id,
134 int lvl, int s_begin);
135 private:
136 void *mem_ctx;
137 int current_slot;
138 prog_scope *storage;
139 };
140
141 class temp_comp_access {
142 public:
143 temp_comp_access();
144 void record_read(int line, prog_scope *scope);
145 void record_write(int line, prog_scope *scope);
146 lifetime get_required_lifetime();
147 private:
148 void propagate_lifetime_to_dominant_write_scope();
149
150 prog_scope *last_read_scope;
151 prog_scope *first_read_scope;
152 prog_scope *first_write_scope;
153 int first_write;
154 int last_read;
155 int last_write;
156 int first_read;
157 bool keep_for_full_loop;
158 };
159
160 class temp_access {
161 public:
162 temp_access();
163 void record_read(int line, prog_scope *scope, int swizzle);
164 void record_write(int line, prog_scope *scope, int writemask);
165 lifetime get_required_lifetime();
166 private:
167 void update_access_mask(int mask);
168
169 temp_comp_access comp[4];
170 int access_mask;
171 bool needs_component_tracking;
172 };
173
prog_scope_storage(void * mc,int n)174 prog_scope_storage::prog_scope_storage(void *mc, int n):
175 mem_ctx(mc),
176 current_slot(0)
177 {
178 storage = ralloc_array(mem_ctx, prog_scope, n);
179 }
180
~prog_scope_storage()181 prog_scope_storage::~prog_scope_storage()
182 {
183 ralloc_free(storage);
184 }
185
186 prog_scope*
create(prog_scope * p,prog_scope_type type,int id,int lvl,int s_begin)187 prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id,
188 int lvl, int s_begin)
189 {
190 storage[current_slot] = prog_scope(p, type, id, lvl, s_begin);
191 return &storage[current_slot++];
192 }
193
prog_scope(prog_scope * parent,prog_scope_type type,int id,int depth,int scope_begin)194 prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id,
195 int depth, int scope_begin):
196 scope_type(type),
197 scope_id(id),
198 scope_nesting_depth(depth),
199 scope_begin(scope_begin),
200 scope_end(-1),
201 break_loop_line(numeric_limits<int>::max()),
202 parent_scope(parent)
203 {
204 }
205
type() const206 prog_scope_type prog_scope::type() const
207 {
208 return scope_type;
209 }
210
parent() const211 prog_scope *prog_scope::parent() const
212 {
213 return parent_scope;
214 }
215
nesting_depth() const216 int prog_scope::nesting_depth() const
217 {
218 return scope_nesting_depth;
219 }
220
is_loop() const221 bool prog_scope::is_loop() const
222 {
223 return (scope_type == loop_body);
224 }
225
is_in_loop() const226 bool prog_scope::is_in_loop() const
227 {
228 if (scope_type == loop_body)
229 return true;
230
231 if (parent_scope)
232 return parent_scope->is_in_loop();
233
234 return false;
235 }
236
innermost_loop() const237 const prog_scope *prog_scope::innermost_loop() const
238 {
239 if (scope_type == loop_body)
240 return this;
241
242 if (parent_scope)
243 return parent_scope->innermost_loop();
244
245 return nullptr;
246 }
247
outermost_loop() const248 const prog_scope *prog_scope::outermost_loop() const
249 {
250 const prog_scope *loop = nullptr;
251 const prog_scope *p = this;
252
253 do {
254 if (p->type() == loop_body)
255 loop = p;
256 p = p->parent();
257 } while (p);
258
259 return loop;
260 }
261
enclosing_conditional() const262 const prog_scope *prog_scope::enclosing_conditional() const
263 {
264 if (is_conditional())
265 return this;
266
267 if (parent_scope)
268 return parent_scope->enclosing_conditional();
269
270 return nullptr;
271 }
272
contains_range_of(const prog_scope & other) const273 bool prog_scope::contains_range_of(const prog_scope& other) const
274 {
275 return (begin() <= other.begin()) && (end() >= other.end());
276 }
277
is_conditional() const278 bool prog_scope::is_conditional() const
279 {
280 return scope_type == if_branch ||
281 scope_type == else_branch ||
282 scope_type == switch_case_branch ||
283 scope_type == switch_default_branch;
284 }
285
in_ifelse_scope() const286 const prog_scope *prog_scope::in_ifelse_scope() const
287 {
288 if (scope_type == if_branch ||
289 scope_type == else_branch)
290 return this;
291
292 if (parent_scope)
293 return parent_scope->in_ifelse_scope();
294
295 return nullptr;
296 }
297
in_switchcase_scope() const298 const prog_scope *prog_scope::in_switchcase_scope() const
299 {
300 if (scope_type == switch_case_branch ||
301 scope_type == switch_default_branch)
302 return this;
303
304 if (parent_scope)
305 return parent_scope->in_switchcase_scope();
306
307 return nullptr;
308 }
309
break_is_for_switchcase() const310 bool prog_scope::break_is_for_switchcase() const
311 {
312 if (scope_type == loop_body)
313 return false;
314
315 if (scope_type == switch_case_branch ||
316 scope_type == switch_default_branch ||
317 scope_type == switch_body)
318 return true;
319
320 if (parent_scope)
321 return parent_scope->break_is_for_switchcase();
322
323 return false;
324 }
325
id() const326 int prog_scope::id() const
327 {
328 return scope_id;
329 }
330
begin() const331 int prog_scope::begin() const
332 {
333 return scope_begin;
334 }
335
end() const336 int prog_scope::end() const
337 {
338 return scope_end;
339 }
340
set_end(int end)341 void prog_scope::set_end(int end)
342 {
343 if (scope_end == -1)
344 scope_end = end;
345 }
346
set_loop_break_line(int line)347 void prog_scope::set_loop_break_line(int line)
348 {
349 if (scope_type == loop_body) {
350 break_loop_line = MIN2(break_loop_line, line);
351 } else {
352 if (parent_scope)
353 parent()->set_loop_break_line(line);
354 }
355 }
356
loop_break_line() const357 int prog_scope::loop_break_line() const
358 {
359 return break_loop_line;
360 }
361
temp_access()362 temp_access::temp_access():
363 access_mask(0),
364 needs_component_tracking(false)
365 {
366 }
367
update_access_mask(int mask)368 void temp_access::update_access_mask(int mask)
369 {
370 if (access_mask && access_mask != mask)
371 needs_component_tracking = true;
372 access_mask |= mask;
373 }
374
record_write(int line,prog_scope * scope,int writemask)375 void temp_access::record_write(int line, prog_scope *scope, int writemask)
376 {
377 update_access_mask(writemask);
378
379 if (writemask & WRITEMASK_X)
380 comp[0].record_write(line, scope);
381 if (writemask & WRITEMASK_Y)
382 comp[1].record_write(line, scope);
383 if (writemask & WRITEMASK_Z)
384 comp[2].record_write(line, scope);
385 if (writemask & WRITEMASK_W)
386 comp[3].record_write(line, scope);
387 }
388
record_read(int line,prog_scope * scope,int swizzle)389 void temp_access::record_read(int line, prog_scope *scope, int swizzle)
390 {
391 int readmask = 0;
392 for (int idx = 0; idx < 4; ++idx) {
393 int swz = GET_SWZ(swizzle, idx);
394 readmask |= (1 << swz) & 0xF;
395 }
396 update_access_mask(readmask);
397
398 if (readmask & WRITEMASK_X)
399 comp[0].record_read(line, scope);
400 if (readmask & WRITEMASK_Y)
401 comp[1].record_read(line, scope);
402 if (readmask & WRITEMASK_Z)
403 comp[2].record_read(line, scope);
404 if (readmask & WRITEMASK_W)
405 comp[3].record_read(line, scope);
406 }
407
make_lifetime(int b,int e)408 inline static lifetime make_lifetime(int b, int e)
409 {
410 lifetime lt;
411 lt.begin = b;
412 lt.end = e;
413 return lt;
414 }
415
get_required_lifetime()416 lifetime temp_access::get_required_lifetime()
417 {
418 lifetime result = make_lifetime(-1, -1);
419
420 unsigned mask = access_mask;
421 while (mask) {
422 unsigned chan = u_bit_scan(&mask);
423 lifetime lt = comp[chan].get_required_lifetime();
424
425 if (lt.begin >= 0) {
426 if ((result.begin < 0) || (result.begin > lt.begin))
427 result.begin = lt.begin;
428 }
429
430 if (lt.end > result.end)
431 result.end = lt.end;
432
433 if (!needs_component_tracking)
434 break;
435 }
436 return result;
437 }
438
temp_comp_access()439 temp_comp_access::temp_comp_access():
440 last_read_scope(nullptr),
441 first_read_scope(nullptr),
442 first_write_scope(nullptr),
443 first_write(-1),
444 last_read(-1),
445 last_write(-1),
446 first_read(numeric_limits<int>::max())
447 {
448 }
449
record_read(int line,prog_scope * scope)450 void temp_comp_access::record_read(int line, prog_scope *scope)
451 {
452 last_read_scope = scope;
453 last_read = line;
454
455 if (first_read > line) {
456 first_read = line;
457 first_read_scope = scope;
458 }
459 }
460
record_write(int line,prog_scope * scope)461 void temp_comp_access::record_write(int line, prog_scope *scope)
462 {
463 last_write = line;
464
465 if (first_write < 0) {
466 first_write = line;
467 first_write_scope = scope;
468 }
469 }
470
propagate_lifetime_to_dominant_write_scope()471 void temp_comp_access::propagate_lifetime_to_dominant_write_scope()
472 {
473 first_write = first_write_scope->begin();
474 int lr = first_write_scope->end();
475
476 if (last_read < lr)
477 last_read = lr;
478 }
479
get_required_lifetime()480 lifetime temp_comp_access::get_required_lifetime()
481 {
482 bool keep_for_full_loop = false;
483
484 /* This register component is not used at all, or only read,
485 * mark it as unused and ignore it when renaming.
486 * glsl_to_tgsi_visitor::renumber_registers will take care of
487 * eliminating registers that are not written to.
488 */
489 if (last_write < 0)
490 return make_lifetime(-1, -1);
491
492 assert(first_write_scope);
493
494 /* Only written to, just make sure the register component is not
495 * reused in the range it is used to write to
496 */
497 if (!last_read_scope)
498 return make_lifetime(first_write, last_write + 1);
499
500 const prog_scope *enclosing_scope_first_read = first_read_scope;
501 const prog_scope *enclosing_scope_first_write = first_write_scope;
502
503 /* We read before writing in a loop
504 * hence the value must survive the loops
505 */
506 if ((first_read <= first_write) &&
507 first_read_scope->is_in_loop()) {
508 keep_for_full_loop = true;
509 enclosing_scope_first_read = first_read_scope->outermost_loop();
510 }
511
512 /* A conditional write within a nested loop must survive
513 * the outermost loop, but only if it is read outside
514 * the condition scope where we write.
515 */
516 const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional();
517 if (conditional && conditional->is_in_loop() &&
518 !conditional->contains_range_of(*last_read_scope)) {
519 keep_for_full_loop = true;
520 enclosing_scope_first_write = conditional->outermost_loop();
521 }
522
523 /* Evaluate the scope that is shared by all: required first write scope,
524 * required first read before write scope, and last read scope.
525 */
526 const prog_scope *enclosing_scope = enclosing_scope_first_read;
527 if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
528 enclosing_scope = enclosing_scope_first_write;
529
530 if (last_read_scope->contains_range_of(*enclosing_scope))
531 enclosing_scope = last_read_scope;
532
533 while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
534 !enclosing_scope->contains_range_of(*last_read_scope)) {
535 enclosing_scope = enclosing_scope->parent();
536 assert(enclosing_scope);
537 }
538
539 /* Propagate the last read scope to the target scope */
540 while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
541 /* If the read is in a loop and we have to move up the scope we need to
542 * extend the life time to the end of this current loop because at this
543 * point we don't know whether the component was written before
544 * un-conditionally in the same loop.
545 */
546 if (last_read_scope->is_loop())
547 last_read = last_read_scope->end();
548
549 last_read_scope = last_read_scope->parent();
550 }
551
552 /* If the variable has to be kept for the whole loop, and we
553 * are currently in a loop, then propagate the life time.
554 */
555 if (keep_for_full_loop && first_write_scope->is_loop())
556 propagate_lifetime_to_dominant_write_scope();
557
558 /* Propagate the first_dominant_write scope to the target scope */
559 while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
560 /* Propagate lifetime if there was a break in a loop and the write was
561 * after the break inside that loop. Note, that this is only needed if
562 * we move up in the scopes.
563 */
564 if (first_write_scope->loop_break_line() < first_write) {
565 keep_for_full_loop = true;
566 propagate_lifetime_to_dominant_write_scope();
567 }
568
569 first_write_scope = first_write_scope->parent();
570
571 /* Propagte lifetime if we are now in a loop */
572 if (keep_for_full_loop && first_write_scope->is_loop())
573 propagate_lifetime_to_dominant_write_scope();
574 }
575
576 /* The last write past the last read is dead code, but we have to
577 * ensure that the component is not reused too early, hence extend the
578 * lifetime past the last write.
579 */
580 if (last_write >= last_read)
581 last_read = last_write + 1;
582
583 /* Here we are at the same scope, all is resolved */
584 return make_lifetime(first_write, last_read);
585 }
586
587 /* Helper class for sorting and searching the registers based
588 * on life times. */
589 class access_record {
590 public:
591 int begin;
592 int end;
593 int reg;
594 bool erase;
595
operator <(const access_record & rhs) const596 bool operator < (const access_record& rhs) const {
597 return begin < rhs.begin;
598 }
599 };
600
601 }
602
603 #ifndef NDEBUG
604 /* Function used for debugging. */
605 static void dump_instruction(int line, prog_scope *scope,
606 const glsl_to_tgsi_instruction& inst);
607 #endif
608
609 /* Scan the program and estimate the required register life times.
610 * The array lifetimes must be pre-allocated
611 */
612 bool
get_temp_registers_required_lifetimes(void * mem_ctx,exec_list * instructions,int ntemps,struct lifetime * lifetimes)613 get_temp_registers_required_lifetimes(void *mem_ctx, exec_list *instructions,
614 int ntemps, struct lifetime *lifetimes)
615 {
616 int line = 0;
617 int loop_id = 0;
618 int if_id = 0;
619 int switch_id = 0;
620 bool is_at_end = false;
621 bool ok = true;
622 int n_scopes = 1;
623
624 /* Count scopes to allocate the needed space without the need for
625 * re-allocation
626 */
627 foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
628 if (inst->op == TGSI_OPCODE_BGNLOOP ||
629 inst->op == TGSI_OPCODE_SWITCH ||
630 inst->op == TGSI_OPCODE_CASE ||
631 inst->op == TGSI_OPCODE_IF ||
632 inst->op == TGSI_OPCODE_UIF ||
633 inst->op == TGSI_OPCODE_ELSE ||
634 inst->op == TGSI_OPCODE_DEFAULT)
635 ++n_scopes;
636 }
637
638 prog_scope_storage scopes(mem_ctx, n_scopes);
639 temp_access *acc = new temp_access[ntemps];
640
641 prog_scope *cur_scope = scopes.create(nullptr, outer_scope, 0, 0, line);
642
643 RENAME_DEBUG(cerr << "========= Begin shader ============\n");
644
645 foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
646 if (is_at_end) {
647 assert(!"GLSL_TO_TGSI: shader has instructions past end marker");
648 break;
649 }
650
651 RENAME_DEBUG(dump_instruction(line, cur_scope, *inst));
652
653 switch (inst->op) {
654 case TGSI_OPCODE_BGNLOOP: {
655 cur_scope = scopes.create(cur_scope, loop_body, loop_id++,
656 cur_scope->nesting_depth() + 1, line);
657 break;
658 }
659 case TGSI_OPCODE_ENDLOOP: {
660 cur_scope->set_end(line);
661 cur_scope = cur_scope->parent();
662 assert(cur_scope);
663 break;
664 }
665 case TGSI_OPCODE_IF:
666 case TGSI_OPCODE_UIF: {
667 assert(num_inst_src_regs(inst) == 1);
668 const st_src_reg& src = inst->src[0];
669 if (src.file == PROGRAM_TEMPORARY)
670 acc[src.index].record_read(line, cur_scope, src.swizzle);
671 cur_scope = scopes.create(cur_scope, if_branch, if_id++,
672 cur_scope->nesting_depth() + 1, line + 1);
673 break;
674 }
675 case TGSI_OPCODE_ELSE: {
676 assert(cur_scope->type() == if_branch);
677 cur_scope->set_end(line - 1);
678 cur_scope = scopes.create(cur_scope->parent(), else_branch,
679 cur_scope->id(), cur_scope->nesting_depth(),
680 line + 1);
681 break;
682 }
683 case TGSI_OPCODE_END: {
684 cur_scope->set_end(line);
685 is_at_end = true;
686 break;
687 }
688 case TGSI_OPCODE_ENDIF: {
689 cur_scope->set_end(line - 1);
690 cur_scope = cur_scope->parent();
691 assert(cur_scope);
692 break;
693 }
694 case TGSI_OPCODE_SWITCH: {
695 assert(num_inst_src_regs(inst) == 1);
696 const st_src_reg& src = inst->src[0];
697 prog_scope *scope = scopes.create(cur_scope, switch_body, switch_id++,
698 cur_scope->nesting_depth() + 1, line);
699 /* We record the read only for the SWITCH statement itself, like it
700 * is used by the only consumer of TGSI_OPCODE_SWITCH in tgsi_exec.c.
701 */
702 if (src.file == PROGRAM_TEMPORARY)
703 acc[src.index].record_read(line, cur_scope, src.swizzle);
704 cur_scope = scope;
705 break;
706 }
707 case TGSI_OPCODE_ENDSWITCH: {
708 cur_scope->set_end(line - 1);
709 /* Remove the case level, it might not have been
710 * closed with a break.
711 */
712 if (cur_scope->type() != switch_body)
713 cur_scope = cur_scope->parent();
714
715 cur_scope = cur_scope->parent();
716 assert(cur_scope);
717 break;
718 }
719 case TGSI_OPCODE_CASE: {
720 /* Take care of tracking the registers. */
721 prog_scope *switch_scope = cur_scope->type() == switch_body ?
722 cur_scope : cur_scope->parent();
723
724 assert(num_inst_src_regs(inst) == 1);
725 const st_src_reg& src = inst->src[0];
726 if (src.file == PROGRAM_TEMPORARY)
727 acc[src.index].record_read(line, switch_scope, src.swizzle);
728
729 /* Fall through to allocate the scope. */
730 }
731 case TGSI_OPCODE_DEFAULT: {
732 prog_scope_type t = inst->op == TGSI_OPCODE_CASE ? switch_case_branch
733 : switch_default_branch;
734 prog_scope *switch_scope = (cur_scope->type() == switch_body) ?
735 cur_scope : cur_scope->parent();
736 assert(switch_scope->type() == switch_body);
737 prog_scope *scope = scopes.create(switch_scope, t,
738 switch_scope->id(),
739 switch_scope->nesting_depth() + 1,
740 line);
741 /* Previous case falls through, so scope was not yet closed. */
742 if ((cur_scope != switch_scope) && (cur_scope->end() == -1))
743 cur_scope->set_end(line - 1);
744 cur_scope = scope;
745 break;
746 }
747 case TGSI_OPCODE_BRK: {
748 if (cur_scope->break_is_for_switchcase()) {
749 cur_scope->set_end(line - 1);
750 } else {
751 cur_scope->set_loop_break_line(line);
752 }
753 break;
754 }
755 case TGSI_OPCODE_CAL:
756 case TGSI_OPCODE_RET:
757 /* These opcodes are not supported and if a subroutine would
758 * be called in a shader, then the lifetime tracking would have
759 * to follow that call to see which registers are used there.
760 * Since this is not done, we have to bail out here and signal
761 * that no register merge will take place.
762 */
763 ok = false;
764 goto out;
765 default: {
766 for (unsigned j = 0; j < num_inst_src_regs(inst); j++) {
767 const st_src_reg& src = inst->src[j];
768 if (src.file == PROGRAM_TEMPORARY)
769 acc[src.index].record_read(line, cur_scope, src.swizzle);
770 }
771 for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) {
772 const st_src_reg& src = inst->tex_offsets[j];
773 if (src.file == PROGRAM_TEMPORARY)
774 acc[src.index].record_read(line, cur_scope, src.swizzle);
775 }
776 for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
777 const st_dst_reg& dst = inst->dst[j];
778 if (dst.file == PROGRAM_TEMPORARY)
779 acc[dst.index].record_write(line, cur_scope, dst.writemask);
780 }
781 }
782 }
783 ++line;
784 }
785
786 RENAME_DEBUG(cerr << "==================================\n\n");
787
788 /* Make sure last scope is closed, even though no
789 * TGSI_OPCODE_END was given.
790 */
791 if (cur_scope->end() < 0)
792 cur_scope->set_end(line - 1);
793
794 RENAME_DEBUG(cerr << "========= lifetimes ==============\n");
795 for(int i = 0; i < ntemps; ++i) {
796 RENAME_DEBUG(cerr << setw(4) << i);
797 lifetimes[i] = acc[i].get_required_lifetime();
798 RENAME_DEBUG(cerr << ": [" << lifetimes[i].begin << ", "
799 << lifetimes[i].end << "]\n");
800 }
801 RENAME_DEBUG(cerr << "==================================\n\n");
802
803 out:
804 delete[] acc;
805 return ok;
806 }
807
808 /* Find the next register between [start, end) that has a life time starting
809 * at or after bound by using a binary search.
810 * start points at the beginning of the search range,
811 * end points at the element past the end of the search range, and
812 * the array comprising [start, end) must be sorted in ascending order.
813 */
814 static access_record*
find_next_rename(access_record * start,access_record * end,int bound)815 find_next_rename(access_record* start, access_record* end, int bound)
816 {
817 int delta = (end - start);
818
819 while (delta > 0) {
820 int half = delta >> 1;
821 access_record* middle = start + half;
822
823 if (bound <= middle->begin) {
824 delta = half;
825 } else {
826 start = middle;
827 ++start;
828 delta -= half + 1;
829 }
830 }
831
832 return start;
833 }
834
835 #ifndef USE_STL_SORT
access_record_compare(const void * a,const void * b)836 static int access_record_compare (const void *a, const void *b) {
837 const access_record *aa = static_cast<const access_record*>(a);
838 const access_record *bb = static_cast<const access_record*>(b);
839 return aa->begin < bb->begin ? -1 : (aa->begin > bb->begin ? 1 : 0);
840 }
841 #endif
842
843 /* This functions evaluates the register merges by using a binary
844 * search to find suitable merge candidates. */
get_temp_registers_remapping(void * mem_ctx,int ntemps,const struct lifetime * lifetimes,struct rename_reg_pair * result)845 void get_temp_registers_remapping(void *mem_ctx, int ntemps,
846 const struct lifetime* lifetimes,
847 struct rename_reg_pair *result)
848 {
849 access_record *reg_access = ralloc_array(mem_ctx, access_record, ntemps);
850
851 int used_temps = 0;
852 for (int i = 0; i < ntemps; ++i) {
853 if (lifetimes[i].begin >= 0) {
854 reg_access[used_temps].begin = lifetimes[i].begin;
855 reg_access[used_temps].end = lifetimes[i].end;
856 reg_access[used_temps].reg = i;
857 reg_access[used_temps].erase = false;
858 ++used_temps;
859 }
860 }
861
862 #ifdef USE_STL_SORT
863 std::sort(reg_access, reg_access + used_temps);
864 #else
865 std::qsort(reg_access, used_temps, sizeof(access_record), access_record_compare);
866 #endif
867
868 access_record *trgt = reg_access;
869 access_record *reg_access_end = reg_access + used_temps;
870 access_record *first_erase = reg_access_end;
871 access_record *search_start = trgt + 1;
872
873 while (trgt != reg_access_end) {
874 access_record *src = find_next_rename(search_start, reg_access_end,
875 trgt->end);
876 if (src != reg_access_end) {
877 result[src->reg].new_reg = trgt->reg;
878 result[src->reg].valid = true;
879 trgt->end = src->end;
880
881 /* Since we only search forward, don't remove the renamed
882 * register just now, only mark it. */
883 src->erase = true;
884
885 if (first_erase == reg_access_end)
886 first_erase = src;
887
888 search_start = src + 1;
889 } else {
890 /* Moving to the next target register it is time to remove
891 * the already merged registers from the search range */
892 if (first_erase != reg_access_end) {
893 access_record *outp = first_erase;
894 access_record *inp = first_erase + 1;
895
896 while (inp != reg_access_end) {
897 if (!inp->erase)
898 *outp++ = *inp;
899 ++inp;
900 }
901
902 reg_access_end = outp;
903 first_erase = reg_access_end;
904 }
905 ++trgt;
906 search_start = trgt + 1;
907 }
908 }
909 ralloc_free(reg_access);
910 }
911
912 /* Code below used for debugging */
913 #ifndef NDEBUG
914 static const char swizzle_txt[] = "xyzw";
915
916 static const char *tgsi_file_names[PROGRAM_FILE_MAX] = {
917 "TEMP", "ARRAY", "IN", "OUT", "STATE", "CONST",
918 "UNIFORM", "WO", "ADDR", "SAMPLER", "SV", "UNDEF",
919 "IMM", "BUF", "MEM", "IMAGE"
920 };
921
922 static
dump_instruction(int line,prog_scope * scope,const glsl_to_tgsi_instruction & inst)923 void dump_instruction(int line, prog_scope *scope,
924 const glsl_to_tgsi_instruction& inst)
925 {
926 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst.op);
927
928 int indent = scope->nesting_depth();
929 if ((scope->type() == switch_case_branch ||
930 scope->type() == switch_default_branch) &&
931 (info->opcode == TGSI_OPCODE_CASE ||
932 info->opcode == TGSI_OPCODE_DEFAULT))
933 --indent;
934
935 if (info->opcode == TGSI_OPCODE_ENDIF ||
936 info->opcode == TGSI_OPCODE_ELSE ||
937 info->opcode == TGSI_OPCODE_ENDLOOP ||
938 info->opcode == TGSI_OPCODE_ENDSWITCH)
939 --indent;
940
941 cerr << setw(4) << line << ": ";
942 for (int i = 0; i < indent; ++i)
943 cerr << " ";
944 cerr << tgsi_get_opcode_name(info->opcode) << " ";
945
946 bool has_operators = false;
947 for (unsigned j = 0; j < num_inst_dst_regs(&inst); j++) {
948 has_operators = true;
949 if (j > 0)
950 cerr << ", ";
951
952 const st_dst_reg& dst = inst.dst[j];
953 cerr << tgsi_file_names[dst.file];
954
955 if (dst.file == PROGRAM_ARRAY)
956 cerr << "(" << dst.array_id << ")";
957
958 cerr << "[" << dst.index << "]";
959
960 if (dst.writemask != TGSI_WRITEMASK_XYZW) {
961 cerr << ".";
962 if (dst.writemask & TGSI_WRITEMASK_X) cerr << "x";
963 if (dst.writemask & TGSI_WRITEMASK_Y) cerr << "y";
964 if (dst.writemask & TGSI_WRITEMASK_Z) cerr << "z";
965 if (dst.writemask & TGSI_WRITEMASK_W) cerr << "w";
966 }
967 }
968 if (has_operators)
969 cerr << " := ";
970
971 for (unsigned j = 0; j < num_inst_src_regs(&inst); j++) {
972 if (j > 0)
973 cerr << ", ";
974
975 const st_src_reg& src = inst.src[j];
976 cerr << tgsi_file_names[src.file]
977 << "[" << src.index << "]";
978 if (src.swizzle != SWIZZLE_XYZW) {
979 cerr << ".";
980 for (int idx = 0; idx < 4; ++idx) {
981 int swz = GET_SWZ(src.swizzle, idx);
982 if (swz < 4) {
983 cerr << swizzle_txt[swz];
984 }
985 }
986 }
987 }
988
989 if (inst.tex_offset_num_offset > 0) {
990 cerr << ", TEXOFS: ";
991 for (unsigned j = 0; j < inst.tex_offset_num_offset; j++) {
992 if (j > 0)
993 cerr << ", ";
994
995 const st_src_reg& src = inst.tex_offsets[j];
996 cerr << tgsi_file_names[src.file]
997 << "[" << src.index << "]";
998 if (src.swizzle != SWIZZLE_XYZW) {
999 cerr << ".";
1000 for (int idx = 0; idx < 4; ++idx) {
1001 int swz = GET_SWZ(src.swizzle, idx);
1002 if (swz < 4) {
1003 cerr << swizzle_txt[swz];
1004 }
1005 }
1006 }
1007 }
1008 }
1009 cerr << "\n";
1010 }
1011 #endif
1012