• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Gert Wollny
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "st_glsl_to_tgsi_temprename.h"
25 #include "tgsi/tgsi_info.h"
26 #include "tgsi/tgsi_strings.h"
27 #include "program/prog_instruction.h"
28 #include "util/bitscan.h"
29 #include <limits>
30 #include <cstdlib>
31 
32 /* std::sort is significantly faster than qsort */
33 #define USE_STL_SORT
34 #ifdef USE_STL_SORT
35 #include <algorithm>
36 #endif
37 
38 #ifndef NDEBUG
39 #include <iostream>
40 #include <iomanip>
41 #include "program/prog_print.h"
42 #include "util/debug.h"
43 using std::cerr;
44 using std::setw;
45 #endif
46 
47 /* If <windows.h> is included this is defined and clashes with
48  * std::numeric_limits<>::max()
49  */
50 #ifdef max
51 #undef max
52 #endif
53 
54 using std::numeric_limits;
55 
56 /* Without c++11 define the nullptr for forward-compatibility
57  * and better readibility */
58 #if __cplusplus < 201103L
59 #define nullptr 0
60 #endif
61 
62 #ifndef NDEBUG
63 /* Helper function to check whether we want to seen debugging output */
is_debug_enabled()64 static inline bool is_debug_enabled ()
65 {
66    static int debug_enabled = -1;
67    if (debug_enabled < 0)
68       debug_enabled = env_var_as_boolean("GLSL_TO_TGSI_RENAME_DEBUG", false);
69    return debug_enabled > 0;
70 }
71 #define RENAME_DEBUG(X) if (is_debug_enabled()) do { X; } while (false);
72 #else
73 #define RENAME_DEBUG(X)
74 #endif
75 
76 namespace {
77 
78 enum prog_scope_type {
79    outer_scope,           /* Outer program scope */
80    loop_body,             /* Inside a loop */
81    if_branch,             /* Inside if branch */
82    else_branch,           /* Inside else branch */
83    switch_body,           /* Inside switch statmenet */
84    switch_case_branch,    /* Inside switch case statmenet */
85    switch_default_branch, /* Inside switch default statmenet */
86    undefined_scope
87 };
88 
89 class prog_scope {
90 public:
91    prog_scope(prog_scope *parent, prog_scope_type type, int id,
92               int depth, int begin);
93 
94    prog_scope_type type() const;
95    prog_scope *parent() const;
96    int nesting_depth() const;
97    int id() const;
98    int end() const;
99    int begin() const;
100    int loop_break_line() const;
101 
102    const prog_scope *in_ifelse_scope() const;
103    const prog_scope *in_switchcase_scope() const;
104    const prog_scope *innermost_loop() const;
105    const prog_scope *outermost_loop() const;
106    const prog_scope *enclosing_conditional() const;
107 
108    bool is_loop() const;
109    bool is_in_loop() const;
110    bool is_conditional() const;
111 
112    bool break_is_for_switchcase() const;
113    bool contains_range_of(const prog_scope& other) const;
114 
115    void set_end(int end);
116    void set_loop_break_line(int line);
117 
118 private:
119    prog_scope_type scope_type;
120    int scope_id;
121    int scope_nesting_depth;
122    int scope_begin;
123    int scope_end;
124    int break_loop_line;
125    prog_scope *parent_scope;
126 };
127 
128 /* Some storage class to encapsulate the prog_scope (de-)allocations */
129 class prog_scope_storage {
130 public:
131    prog_scope_storage(void *mem_ctx, int n);
132    ~prog_scope_storage();
133    prog_scope * create(prog_scope *p, prog_scope_type type, int id,
134                        int lvl, int s_begin);
135 private:
136    void *mem_ctx;
137    int current_slot;
138    prog_scope *storage;
139 };
140 
141 class temp_comp_access {
142 public:
143    temp_comp_access();
144    void record_read(int line, prog_scope *scope);
145    void record_write(int line, prog_scope *scope);
146    lifetime get_required_lifetime();
147 private:
148    void propagate_lifetime_to_dominant_write_scope();
149 
150    prog_scope *last_read_scope;
151    prog_scope *first_read_scope;
152    prog_scope *first_write_scope;
153    int first_write;
154    int last_read;
155    int last_write;
156    int first_read;
157    bool keep_for_full_loop;
158 };
159 
160 class temp_access {
161 public:
162    temp_access();
163    void record_read(int line, prog_scope *scope, int swizzle);
164    void record_write(int line, prog_scope *scope, int writemask);
165    lifetime get_required_lifetime();
166 private:
167    void update_access_mask(int mask);
168 
169    temp_comp_access comp[4];
170    int access_mask;
171    bool needs_component_tracking;
172 };
173 
prog_scope_storage(void * mc,int n)174 prog_scope_storage::prog_scope_storage(void *mc, int n):
175    mem_ctx(mc),
176    current_slot(0)
177 {
178    storage = ralloc_array(mem_ctx, prog_scope, n);
179 }
180 
~prog_scope_storage()181 prog_scope_storage::~prog_scope_storage()
182 {
183    ralloc_free(storage);
184 }
185 
186 prog_scope*
create(prog_scope * p,prog_scope_type type,int id,int lvl,int s_begin)187 prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id,
188                            int lvl, int s_begin)
189 {
190    storage[current_slot] = prog_scope(p, type, id, lvl, s_begin);
191    return &storage[current_slot++];
192 }
193 
prog_scope(prog_scope * parent,prog_scope_type type,int id,int depth,int scope_begin)194 prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id,
195                        int depth, int scope_begin):
196    scope_type(type),
197    scope_id(id),
198    scope_nesting_depth(depth),
199    scope_begin(scope_begin),
200    scope_end(-1),
201    break_loop_line(numeric_limits<int>::max()),
202    parent_scope(parent)
203 {
204 }
205 
type() const206 prog_scope_type prog_scope::type() const
207 {
208    return scope_type;
209 }
210 
parent() const211 prog_scope *prog_scope::parent() const
212 {
213    return parent_scope;
214 }
215 
nesting_depth() const216 int prog_scope::nesting_depth() const
217 {
218    return scope_nesting_depth;
219 }
220 
is_loop() const221 bool prog_scope::is_loop() const
222 {
223    return (scope_type == loop_body);
224 }
225 
is_in_loop() const226 bool prog_scope::is_in_loop() const
227 {
228    if (scope_type == loop_body)
229       return true;
230 
231    if (parent_scope)
232       return parent_scope->is_in_loop();
233 
234    return false;
235 }
236 
innermost_loop() const237 const prog_scope *prog_scope::innermost_loop() const
238 {
239    if (scope_type == loop_body)
240       return this;
241 
242    if (parent_scope)
243       return parent_scope->innermost_loop();
244 
245    return nullptr;
246 }
247 
outermost_loop() const248 const prog_scope *prog_scope::outermost_loop() const
249 {
250    const prog_scope *loop = nullptr;
251    const prog_scope *p = this;
252 
253    do {
254       if (p->type() == loop_body)
255          loop = p;
256       p = p->parent();
257    } while (p);
258 
259    return loop;
260 }
261 
enclosing_conditional() const262 const prog_scope *prog_scope::enclosing_conditional() const
263 {
264    if (is_conditional())
265       return this;
266 
267    if (parent_scope)
268       return parent_scope->enclosing_conditional();
269 
270    return nullptr;
271 }
272 
contains_range_of(const prog_scope & other) const273 bool prog_scope::contains_range_of(const prog_scope& other) const
274 {
275    return (begin() <= other.begin()) && (end() >= other.end());
276 }
277 
is_conditional() const278 bool prog_scope::is_conditional() const
279 {
280    return scope_type == if_branch ||
281          scope_type == else_branch ||
282          scope_type == switch_case_branch ||
283          scope_type == switch_default_branch;
284 }
285 
in_ifelse_scope() const286 const prog_scope *prog_scope::in_ifelse_scope() const
287 {
288    if (scope_type == if_branch ||
289        scope_type == else_branch)
290       return this;
291 
292    if (parent_scope)
293       return parent_scope->in_ifelse_scope();
294 
295    return nullptr;
296 }
297 
in_switchcase_scope() const298 const prog_scope *prog_scope::in_switchcase_scope() const
299 {
300    if (scope_type == switch_case_branch ||
301        scope_type == switch_default_branch)
302       return this;
303 
304    if (parent_scope)
305       return parent_scope->in_switchcase_scope();
306 
307    return nullptr;
308 }
309 
break_is_for_switchcase() const310 bool prog_scope::break_is_for_switchcase() const
311 {
312    if (scope_type == loop_body)
313       return false;
314 
315    if (scope_type == switch_case_branch ||
316        scope_type == switch_default_branch ||
317        scope_type == switch_body)
318       return true;
319 
320    if (parent_scope)
321       return parent_scope->break_is_for_switchcase();
322 
323    return false;
324 }
325 
id() const326 int prog_scope::id() const
327 {
328    return scope_id;
329 }
330 
begin() const331 int prog_scope::begin() const
332 {
333    return scope_begin;
334 }
335 
end() const336 int prog_scope::end() const
337 {
338    return scope_end;
339 }
340 
set_end(int end)341 void prog_scope::set_end(int end)
342 {
343    if (scope_end == -1)
344       scope_end = end;
345 }
346 
set_loop_break_line(int line)347 void prog_scope::set_loop_break_line(int line)
348 {
349    if (scope_type == loop_body) {
350       break_loop_line = MIN2(break_loop_line, line);
351    } else {
352       if (parent_scope)
353          parent()->set_loop_break_line(line);
354    }
355 }
356 
loop_break_line() const357 int prog_scope::loop_break_line() const
358 {
359    return break_loop_line;
360 }
361 
temp_access()362 temp_access::temp_access():
363    access_mask(0),
364    needs_component_tracking(false)
365 {
366 }
367 
update_access_mask(int mask)368 void temp_access::update_access_mask(int mask)
369 {
370    if (access_mask && access_mask != mask)
371       needs_component_tracking = true;
372    access_mask |= mask;
373 }
374 
record_write(int line,prog_scope * scope,int writemask)375 void temp_access::record_write(int line, prog_scope *scope, int writemask)
376 {
377    update_access_mask(writemask);
378 
379    if (writemask & WRITEMASK_X)
380       comp[0].record_write(line, scope);
381    if (writemask & WRITEMASK_Y)
382       comp[1].record_write(line, scope);
383    if (writemask & WRITEMASK_Z)
384       comp[2].record_write(line, scope);
385    if (writemask & WRITEMASK_W)
386       comp[3].record_write(line, scope);
387 }
388 
record_read(int line,prog_scope * scope,int swizzle)389 void temp_access::record_read(int line, prog_scope *scope, int swizzle)
390 {
391    int readmask = 0;
392    for (int idx = 0; idx < 4; ++idx) {
393       int swz = GET_SWZ(swizzle, idx);
394       readmask |= (1 << swz) & 0xF;
395    }
396    update_access_mask(readmask);
397 
398    if (readmask & WRITEMASK_X)
399       comp[0].record_read(line, scope);
400    if (readmask & WRITEMASK_Y)
401       comp[1].record_read(line, scope);
402    if (readmask & WRITEMASK_Z)
403       comp[2].record_read(line, scope);
404    if (readmask & WRITEMASK_W)
405       comp[3].record_read(line, scope);
406 }
407 
make_lifetime(int b,int e)408 inline static lifetime make_lifetime(int b, int e)
409 {
410    lifetime lt;
411    lt.begin = b;
412    lt.end = e;
413    return lt;
414 }
415 
get_required_lifetime()416 lifetime temp_access::get_required_lifetime()
417 {
418    lifetime result = make_lifetime(-1, -1);
419 
420    unsigned mask = access_mask;
421    while (mask) {
422       unsigned chan = u_bit_scan(&mask);
423       lifetime lt = comp[chan].get_required_lifetime();
424 
425       if (lt.begin >= 0) {
426          if ((result.begin < 0) || (result.begin > lt.begin))
427             result.begin = lt.begin;
428       }
429 
430       if (lt.end > result.end)
431          result.end = lt.end;
432 
433       if (!needs_component_tracking)
434          break;
435    }
436    return result;
437 }
438 
temp_comp_access()439 temp_comp_access::temp_comp_access():
440    last_read_scope(nullptr),
441    first_read_scope(nullptr),
442    first_write_scope(nullptr),
443    first_write(-1),
444    last_read(-1),
445    last_write(-1),
446    first_read(numeric_limits<int>::max())
447 {
448 }
449 
record_read(int line,prog_scope * scope)450 void temp_comp_access::record_read(int line, prog_scope *scope)
451 {
452    last_read_scope = scope;
453    last_read = line;
454 
455    if (first_read > line) {
456       first_read = line;
457       first_read_scope = scope;
458    }
459 }
460 
record_write(int line,prog_scope * scope)461 void temp_comp_access::record_write(int line, prog_scope *scope)
462 {
463    last_write = line;
464 
465    if (first_write < 0) {
466       first_write = line;
467       first_write_scope = scope;
468    }
469 }
470 
propagate_lifetime_to_dominant_write_scope()471 void temp_comp_access::propagate_lifetime_to_dominant_write_scope()
472 {
473    first_write = first_write_scope->begin();
474    int lr = first_write_scope->end();
475 
476    if (last_read < lr)
477       last_read = lr;
478 }
479 
get_required_lifetime()480 lifetime temp_comp_access::get_required_lifetime()
481 {
482    bool keep_for_full_loop = false;
483 
484    /* This register component is not used at all, or only read,
485     * mark it as unused and ignore it when renaming.
486     * glsl_to_tgsi_visitor::renumber_registers will take care of
487     * eliminating registers that are not written to.
488     */
489    if (last_write < 0)
490       return make_lifetime(-1, -1);
491 
492    assert(first_write_scope);
493 
494    /* Only written to, just make sure the register component is not
495     * reused in the range it is used to write to
496     */
497    if (!last_read_scope)
498       return make_lifetime(first_write, last_write + 1);
499 
500    const prog_scope *enclosing_scope_first_read = first_read_scope;
501    const prog_scope *enclosing_scope_first_write = first_write_scope;
502 
503    /* We read before writing in a loop
504     * hence the value must survive the loops
505     */
506    if ((first_read <= first_write) &&
507        first_read_scope->is_in_loop()) {
508       keep_for_full_loop = true;
509       enclosing_scope_first_read = first_read_scope->outermost_loop();
510    }
511 
512    /* A conditional write within a nested loop must survive
513     * the outermost loop, but only if it is read outside
514     * the condition scope where we write.
515     */
516    const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional();
517    if (conditional && conditional->is_in_loop() &&
518        !conditional->contains_range_of(*last_read_scope)) {
519       keep_for_full_loop = true;
520       enclosing_scope_first_write = conditional->outermost_loop();
521    }
522 
523    /* Evaluate the scope that is shared by all: required first write scope,
524     * required first read before write scope, and last read scope.
525     */
526    const prog_scope *enclosing_scope = enclosing_scope_first_read;
527    if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
528       enclosing_scope = enclosing_scope_first_write;
529 
530    if (last_read_scope->contains_range_of(*enclosing_scope))
531       enclosing_scope = last_read_scope;
532 
533    while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
534           !enclosing_scope->contains_range_of(*last_read_scope)) {
535       enclosing_scope = enclosing_scope->parent();
536       assert(enclosing_scope);
537    }
538 
539    /* Propagate the last read scope to the target scope */
540    while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
541       /* If the read is in a loop and we have to move up the scope we need to
542        * extend the life time to the end of this current loop because at this
543        * point we don't know whether the component was written before
544        * un-conditionally in the same loop.
545        */
546       if (last_read_scope->is_loop())
547          last_read = last_read_scope->end();
548 
549       last_read_scope = last_read_scope->parent();
550    }
551 
552    /* If the variable has to be kept for the whole loop, and we
553     * are currently in a loop, then propagate the life time.
554     */
555    if (keep_for_full_loop && first_write_scope->is_loop())
556       propagate_lifetime_to_dominant_write_scope();
557 
558    /* Propagate the first_dominant_write scope to the target scope */
559    while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
560       /* Propagate lifetime if there was a break in a loop and the write was
561        * after the break inside that loop. Note, that this is only needed if
562        * we move up in the scopes.
563        */
564       if (first_write_scope->loop_break_line() < first_write) {
565          keep_for_full_loop = true;
566          propagate_lifetime_to_dominant_write_scope();
567       }
568 
569       first_write_scope = first_write_scope->parent();
570 
571       /* Propagte lifetime if we are now in a loop */
572       if (keep_for_full_loop && first_write_scope->is_loop())
573           propagate_lifetime_to_dominant_write_scope();
574    }
575 
576    /* The last write past the last read is dead code, but we have to
577     * ensure that the component is not reused too early, hence extend the
578     * lifetime past the last write.
579     */
580    if (last_write >= last_read)
581       last_read = last_write + 1;
582 
583    /* Here we are at the same scope, all is resolved */
584    return make_lifetime(first_write, last_read);
585 }
586 
587 /* Helper class for sorting and searching the registers based
588  * on life times. */
589 class access_record {
590 public:
591    int begin;
592    int end;
593    int reg;
594    bool erase;
595 
operator <(const access_record & rhs) const596    bool operator < (const access_record& rhs) const {
597       return begin < rhs.begin;
598    }
599 };
600 
601 }
602 
603 #ifndef NDEBUG
604 /* Function used for debugging. */
605 static void dump_instruction(int line, prog_scope *scope,
606                              const glsl_to_tgsi_instruction& inst);
607 #endif
608 
609 /* Scan the program and estimate the required register life times.
610  * The array lifetimes must be pre-allocated
611  */
612 bool
get_temp_registers_required_lifetimes(void * mem_ctx,exec_list * instructions,int ntemps,struct lifetime * lifetimes)613 get_temp_registers_required_lifetimes(void *mem_ctx, exec_list *instructions,
614                                       int ntemps, struct lifetime *lifetimes)
615 {
616    int line = 0;
617    int loop_id = 0;
618    int if_id = 0;
619    int switch_id = 0;
620    bool is_at_end = false;
621    bool ok = true;
622    int n_scopes = 1;
623 
624    /* Count scopes to allocate the needed space without the need for
625     * re-allocation
626     */
627    foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
628       if (inst->op == TGSI_OPCODE_BGNLOOP ||
629           inst->op == TGSI_OPCODE_SWITCH ||
630           inst->op == TGSI_OPCODE_CASE ||
631           inst->op == TGSI_OPCODE_IF ||
632           inst->op == TGSI_OPCODE_UIF ||
633           inst->op == TGSI_OPCODE_ELSE ||
634           inst->op == TGSI_OPCODE_DEFAULT)
635          ++n_scopes;
636    }
637 
638    prog_scope_storage scopes(mem_ctx, n_scopes);
639    temp_access *acc = new temp_access[ntemps];
640 
641    prog_scope *cur_scope = scopes.create(nullptr, outer_scope, 0, 0, line);
642 
643    RENAME_DEBUG(cerr << "========= Begin shader ============\n");
644 
645    foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
646       if (is_at_end) {
647          assert(!"GLSL_TO_TGSI: shader has instructions past end marker");
648          break;
649       }
650 
651       RENAME_DEBUG(dump_instruction(line, cur_scope, *inst));
652 
653       switch (inst->op) {
654       case TGSI_OPCODE_BGNLOOP: {
655          cur_scope = scopes.create(cur_scope, loop_body, loop_id++,
656                                    cur_scope->nesting_depth() + 1, line);
657          break;
658       }
659       case TGSI_OPCODE_ENDLOOP: {
660          cur_scope->set_end(line);
661          cur_scope = cur_scope->parent();
662          assert(cur_scope);
663          break;
664       }
665       case TGSI_OPCODE_IF:
666       case TGSI_OPCODE_UIF: {
667          assert(num_inst_src_regs(inst) == 1);
668          const st_src_reg& src = inst->src[0];
669          if (src.file == PROGRAM_TEMPORARY)
670             acc[src.index].record_read(line, cur_scope, src.swizzle);
671          cur_scope = scopes.create(cur_scope, if_branch, if_id++,
672                                    cur_scope->nesting_depth() + 1, line + 1);
673          break;
674       }
675       case TGSI_OPCODE_ELSE: {
676          assert(cur_scope->type() == if_branch);
677          cur_scope->set_end(line - 1);
678          cur_scope = scopes.create(cur_scope->parent(), else_branch,
679                                    cur_scope->id(), cur_scope->nesting_depth(),
680                                    line + 1);
681          break;
682       }
683       case TGSI_OPCODE_END: {
684          cur_scope->set_end(line);
685          is_at_end = true;
686          break;
687       }
688       case TGSI_OPCODE_ENDIF: {
689          cur_scope->set_end(line - 1);
690          cur_scope = cur_scope->parent();
691          assert(cur_scope);
692          break;
693       }
694       case TGSI_OPCODE_SWITCH: {
695          assert(num_inst_src_regs(inst) == 1);
696          const st_src_reg& src = inst->src[0];
697          prog_scope *scope = scopes.create(cur_scope, switch_body, switch_id++,
698                                            cur_scope->nesting_depth() + 1, line);
699          /* We record the read only for the SWITCH statement itself, like it
700           * is used by the only consumer of TGSI_OPCODE_SWITCH in tgsi_exec.c.
701           */
702          if (src.file == PROGRAM_TEMPORARY)
703             acc[src.index].record_read(line, cur_scope, src.swizzle);
704          cur_scope = scope;
705          break;
706       }
707       case TGSI_OPCODE_ENDSWITCH: {
708          cur_scope->set_end(line - 1);
709          /* Remove the case level, it might not have been
710           * closed with a break.
711           */
712          if (cur_scope->type() != switch_body)
713             cur_scope = cur_scope->parent();
714 
715          cur_scope = cur_scope->parent();
716          assert(cur_scope);
717          break;
718       }
719       case TGSI_OPCODE_CASE: {
720          /* Take care of tracking the registers. */
721          prog_scope *switch_scope = cur_scope->type() == switch_body ?
722                                        cur_scope : cur_scope->parent();
723 
724          assert(num_inst_src_regs(inst) == 1);
725          const st_src_reg& src = inst->src[0];
726          if (src.file == PROGRAM_TEMPORARY)
727             acc[src.index].record_read(line, switch_scope, src.swizzle);
728 
729          /* Fall through to allocate the scope. */
730       }
731       case TGSI_OPCODE_DEFAULT: {
732          prog_scope_type t = inst->op == TGSI_OPCODE_CASE ? switch_case_branch
733                                                        : switch_default_branch;
734          prog_scope *switch_scope = (cur_scope->type() == switch_body) ?
735             cur_scope : cur_scope->parent();
736          assert(switch_scope->type() == switch_body);
737          prog_scope *scope = scopes.create(switch_scope, t,
738                                            switch_scope->id(),
739                                            switch_scope->nesting_depth() + 1,
740                                            line);
741          /* Previous case falls through, so scope was not yet closed. */
742          if ((cur_scope != switch_scope) && (cur_scope->end() == -1))
743             cur_scope->set_end(line - 1);
744          cur_scope = scope;
745          break;
746       }
747       case TGSI_OPCODE_BRK: {
748          if (cur_scope->break_is_for_switchcase()) {
749             cur_scope->set_end(line - 1);
750          } else {
751             cur_scope->set_loop_break_line(line);
752          }
753          break;
754       }
755       case TGSI_OPCODE_CAL:
756       case TGSI_OPCODE_RET:
757          /* These opcodes are not supported and if a subroutine would
758           * be called in a shader, then the lifetime tracking would have
759           * to follow that call to see which registers are used there.
760           * Since this is not done, we have to bail out here and signal
761           * that no register merge will take place.
762           */
763          ok = false;
764          goto out;
765       default: {
766          for (unsigned j = 0; j < num_inst_src_regs(inst); j++) {
767             const st_src_reg& src = inst->src[j];
768             if (src.file == PROGRAM_TEMPORARY)
769                acc[src.index].record_read(line, cur_scope, src.swizzle);
770          }
771          for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) {
772             const st_src_reg& src = inst->tex_offsets[j];
773             if (src.file == PROGRAM_TEMPORARY)
774                acc[src.index].record_read(line, cur_scope, src.swizzle);
775          }
776          for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
777             const st_dst_reg& dst = inst->dst[j];
778             if (dst.file == PROGRAM_TEMPORARY)
779                acc[dst.index].record_write(line, cur_scope, dst.writemask);
780          }
781       }
782       }
783       ++line;
784    }
785 
786    RENAME_DEBUG(cerr << "==================================\n\n");
787 
788    /* Make sure last scope is closed, even though no
789     * TGSI_OPCODE_END was given.
790     */
791    if (cur_scope->end() < 0)
792       cur_scope->set_end(line - 1);
793 
794    RENAME_DEBUG(cerr << "========= lifetimes ==============\n");
795    for(int i = 0; i < ntemps; ++i) {
796       RENAME_DEBUG(cerr << setw(4) << i);
797       lifetimes[i] = acc[i].get_required_lifetime();
798       RENAME_DEBUG(cerr << ": [" << lifetimes[i].begin << ", "
799                         << lifetimes[i].end << "]\n");
800    }
801    RENAME_DEBUG(cerr << "==================================\n\n");
802 
803 out:
804    delete[] acc;
805    return ok;
806 }
807 
808 /* Find the next register between [start, end) that has a life time starting
809  * at or after bound by using a binary search.
810  * start points at the beginning of the search range,
811  * end points at the element past the end of the search range, and
812  * the array comprising [start, end) must be sorted in ascending order.
813  */
814 static access_record*
find_next_rename(access_record * start,access_record * end,int bound)815 find_next_rename(access_record* start, access_record* end, int bound)
816 {
817    int delta = (end - start);
818 
819    while (delta > 0) {
820       int half = delta >> 1;
821       access_record* middle = start + half;
822 
823       if (bound <= middle->begin) {
824          delta = half;
825       } else {
826          start = middle;
827          ++start;
828          delta -= half + 1;
829       }
830    }
831 
832    return start;
833 }
834 
835 #ifndef USE_STL_SORT
access_record_compare(const void * a,const void * b)836 static int access_record_compare (const void *a, const void *b) {
837    const access_record *aa = static_cast<const access_record*>(a);
838    const access_record *bb = static_cast<const access_record*>(b);
839    return aa->begin < bb->begin ? -1 : (aa->begin > bb->begin ? 1 : 0);
840 }
841 #endif
842 
843 /* This functions evaluates the register merges by using a binary
844  * search to find suitable merge candidates. */
get_temp_registers_remapping(void * mem_ctx,int ntemps,const struct lifetime * lifetimes,struct rename_reg_pair * result)845 void get_temp_registers_remapping(void *mem_ctx, int ntemps,
846                                   const struct lifetime* lifetimes,
847                                   struct rename_reg_pair *result)
848 {
849    access_record *reg_access = ralloc_array(mem_ctx, access_record, ntemps);
850 
851    int used_temps = 0;
852    for (int i = 0; i < ntemps; ++i) {
853       if (lifetimes[i].begin >= 0) {
854          reg_access[used_temps].begin = lifetimes[i].begin;
855          reg_access[used_temps].end = lifetimes[i].end;
856          reg_access[used_temps].reg = i;
857          reg_access[used_temps].erase = false;
858          ++used_temps;
859       }
860    }
861 
862 #ifdef USE_STL_SORT
863    std::sort(reg_access, reg_access + used_temps);
864 #else
865    std::qsort(reg_access, used_temps, sizeof(access_record), access_record_compare);
866 #endif
867 
868    access_record *trgt = reg_access;
869    access_record *reg_access_end = reg_access + used_temps;
870    access_record *first_erase = reg_access_end;
871    access_record *search_start = trgt + 1;
872 
873    while (trgt != reg_access_end) {
874       access_record *src = find_next_rename(search_start, reg_access_end,
875                                             trgt->end);
876       if (src != reg_access_end) {
877          result[src->reg].new_reg = trgt->reg;
878          result[src->reg].valid = true;
879          trgt->end = src->end;
880 
881          /* Since we only search forward, don't remove the renamed
882           * register just now, only mark it. */
883          src->erase = true;
884 
885          if (first_erase == reg_access_end)
886             first_erase = src;
887 
888          search_start = src + 1;
889       } else {
890          /* Moving to the next target register it is time to remove
891           * the already merged registers from the search range */
892          if (first_erase != reg_access_end) {
893             access_record *outp = first_erase;
894             access_record *inp = first_erase + 1;
895 
896             while (inp != reg_access_end) {
897                if (!inp->erase)
898                   *outp++ = *inp;
899                ++inp;
900             }
901 
902             reg_access_end = outp;
903             first_erase = reg_access_end;
904          }
905          ++trgt;
906          search_start = trgt + 1;
907       }
908    }
909    ralloc_free(reg_access);
910 }
911 
912 /* Code below used for debugging */
913 #ifndef NDEBUG
914 static const char swizzle_txt[] = "xyzw";
915 
916 static const char *tgsi_file_names[PROGRAM_FILE_MAX] =  {
917    "TEMP",  "ARRAY",   "IN", "OUT", "STATE", "CONST",
918    "UNIFORM",  "WO", "ADDR", "SAMPLER",  "SV", "UNDEF",
919    "IMM", "BUF",  "MEM",  "IMAGE"
920 };
921 
922 static
dump_instruction(int line,prog_scope * scope,const glsl_to_tgsi_instruction & inst)923 void dump_instruction(int line, prog_scope *scope,
924                       const glsl_to_tgsi_instruction& inst)
925 {
926    const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst.op);
927 
928    int indent = scope->nesting_depth();
929    if ((scope->type() == switch_case_branch ||
930         scope->type() == switch_default_branch) &&
931        (info->opcode == TGSI_OPCODE_CASE ||
932         info->opcode == TGSI_OPCODE_DEFAULT))
933       --indent;
934 
935    if (info->opcode == TGSI_OPCODE_ENDIF ||
936        info->opcode == TGSI_OPCODE_ELSE ||
937        info->opcode == TGSI_OPCODE_ENDLOOP ||
938        info->opcode == TGSI_OPCODE_ENDSWITCH)
939       --indent;
940 
941    cerr << setw(4) << line << ": ";
942    for (int i = 0; i < indent; ++i)
943       cerr << "    ";
944    cerr << tgsi_get_opcode_name(info->opcode) << " ";
945 
946    bool has_operators = false;
947    for (unsigned j = 0; j < num_inst_dst_regs(&inst); j++) {
948       has_operators = true;
949       if (j > 0)
950          cerr << ", ";
951 
952       const st_dst_reg& dst = inst.dst[j];
953       cerr << tgsi_file_names[dst.file];
954 
955       if (dst.file == PROGRAM_ARRAY)
956          cerr << "(" << dst.array_id << ")";
957 
958       cerr << "[" << dst.index << "]";
959 
960       if (dst.writemask != TGSI_WRITEMASK_XYZW) {
961          cerr << ".";
962          if (dst.writemask & TGSI_WRITEMASK_X) cerr << "x";
963          if (dst.writemask & TGSI_WRITEMASK_Y) cerr << "y";
964          if (dst.writemask & TGSI_WRITEMASK_Z) cerr << "z";
965          if (dst.writemask & TGSI_WRITEMASK_W) cerr << "w";
966       }
967    }
968    if (has_operators)
969       cerr << " := ";
970 
971    for (unsigned j = 0; j < num_inst_src_regs(&inst); j++) {
972       if (j > 0)
973          cerr << ", ";
974 
975       const st_src_reg& src = inst.src[j];
976       cerr << tgsi_file_names[src.file]
977            << "[" << src.index << "]";
978       if (src.swizzle != SWIZZLE_XYZW) {
979          cerr << ".";
980          for (int idx = 0; idx < 4; ++idx) {
981             int swz = GET_SWZ(src.swizzle, idx);
982             if (swz < 4) {
983                cerr << swizzle_txt[swz];
984             }
985          }
986       }
987    }
988 
989    if (inst.tex_offset_num_offset > 0) {
990       cerr << ", TEXOFS: ";
991       for (unsigned j = 0; j < inst.tex_offset_num_offset; j++) {
992          if (j > 0)
993             cerr << ", ";
994 
995          const st_src_reg& src = inst.tex_offsets[j];
996          cerr << tgsi_file_names[src.file]
997                << "[" << src.index << "]";
998          if (src.swizzle != SWIZZLE_XYZW) {
999             cerr << ".";
1000             for (int idx = 0; idx < 4; ++idx) {
1001                int swz = GET_SWZ(src.swizzle, idx);
1002                if (swz < 4) {
1003                   cerr << swizzle_txt[swz];
1004                }
1005             }
1006          }
1007       }
1008    }
1009    cerr << "\n";
1010 }
1011 #endif
1012