• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <limits.h>
26 
27 #include "gpir.h"
28 
29 /* Register sensitive schedule algorithm from paper:
30  * "Register-Sensitive Selection, Duplication, and Sequencing of Instructions"
31  * Author: Vivek Sarkar,  Mauricio J. Serrano,  Barbara B. Simons
32  */
33 
cmp_float(const void * a,const void * b)34 static int cmp_float(const void *a, const void *b)
35 {
36    const float *fa = (const float *) a;
37    const float *fb = (const float *) b;
38    return (*fa > *fb) - (*fa < *fb);
39 }
40 
schedule_calc_sched_info(gpir_node * node)41 static void schedule_calc_sched_info(gpir_node *node)
42 {
43    int n = 0;
44    float extra_reg = 1.0f;
45 
46    /* update all children's sched info */
47    gpir_node_foreach_pred(node, dep) {
48       gpir_node *pred = dep->pred;
49 
50       if (pred->rsched.reg_pressure < 0)
51          schedule_calc_sched_info(pred);
52 
53       int est = pred->rsched.est + 1;
54       if (node->rsched.est < est)
55          node->rsched.est = est;
56 
57       unsigned len = list_length(&pred->succ_list);
58       assert(len > 0);
59       float reg_weight = 1.0f - 1.0f / len;
60       if (extra_reg > reg_weight)
61          extra_reg = reg_weight;
62 
63       n++;
64    }
65 
66    /* leaf instr */
67    if (!n) {
68       node->rsched.reg_pressure = 0;
69       return;
70    }
71 
72    int i = 0;
73    float reg[n];
74    gpir_node_foreach_pred(node, dep) {
75       gpir_node *pred = dep->pred;
76       reg[i++] = pred->rsched.reg_pressure;
77    }
78 
79    /* sort */
80    qsort(reg, n, sizeof(reg[0]), cmp_float);
81 
82    for (i = 0; i < n; i++) {
83       float pressure = reg[i] + n - (i + 1);
84       if (pressure > node->rsched.reg_pressure)
85          node->rsched.reg_pressure = pressure;
86    }
87 
88    /* If all children of this node have multi parents, then this
89     * node need an extra reg to store its result. For example,
90     * it's not fair for parent has the same reg pressure as child
91     * if n==1 and child's successor>1, because we need 2 reg for
92     * this.
93     *
94     * But we can't add a full reg to the reg_pressure, because the
95     * last parent of a multi-successor child doesn't need an extra
96     * reg. For example, a single child (with multi successor) node
97     * should has less reg pressure than a two children (with single
98     * successor) instr.
99     *
100     * extra reg = min(all child)(1.0 - 1.0 / num successor)
101     */
102    node->rsched.reg_pressure += extra_reg;
103 }
104 
schedule_insert_ready_list(struct list_head * ready_list,gpir_node * insert_node)105 static void schedule_insert_ready_list(struct list_head *ready_list,
106                                        gpir_node *insert_node)
107 {
108    struct list_head *insert_pos = ready_list;
109 
110    list_for_each_entry(gpir_node, node, ready_list, list) {
111       if (gpir_op_infos[node->op].schedule_first) {
112          continue;
113       }
114 
115       if (gpir_op_infos[insert_node->op].schedule_first ||
116           insert_node->rsched.parent_index < node->rsched.parent_index ||
117           (insert_node->rsched.parent_index == node->rsched.parent_index &&
118            (insert_node->rsched.reg_pressure < node->rsched.reg_pressure ||
119             (insert_node->rsched.reg_pressure == node->rsched.reg_pressure &&
120              (insert_node->rsched.est >= node->rsched.est))))) {
121          insert_pos = &node->list;
122          if (node == insert_node)
123             return;
124          break;
125       }
126    }
127 
128    list_del(&insert_node->list);
129    list_addtail(&insert_node->list, insert_pos);
130 }
131 
schedule_ready_list(gpir_block * block,struct list_head * ready_list)132 static void schedule_ready_list(gpir_block *block, struct list_head *ready_list)
133 {
134    if (list_is_empty(ready_list))
135       return;
136 
137    gpir_node *node = list_first_entry(ready_list, gpir_node, list);
138    list_del(&node->list);
139 
140    /* schedule the node to the block node list */
141    list_add(&node->list, &block->node_list);
142    node->rsched.scheduled = true;
143    block->rsched.node_index--;
144 
145    gpir_node_foreach_pred(node, dep) {
146       gpir_node *pred = dep->pred;
147       pred->rsched.parent_index = block->rsched.node_index;
148 
149       bool ready = true;
150       gpir_node_foreach_succ(pred, dep) {
151          gpir_node *succ = dep->succ;
152          if (!succ->rsched.scheduled) {
153             ready = false;
154             break;
155          }
156       }
157       /* all successor have been scheduled */
158       if (ready)
159          schedule_insert_ready_list(ready_list, pred);
160    }
161 
162    schedule_ready_list(block, ready_list);
163 }
164 
schedule_block(gpir_block * block)165 static void schedule_block(gpir_block *block)
166 {
167    /* move all nodes to node_list, block->node_list will
168     * contain schedule result */
169    struct list_head node_list;
170    list_replace(&block->node_list, &node_list);
171    list_inithead(&block->node_list);
172 
173    /* step 2 & 3 */
174    list_for_each_entry(gpir_node, node, &node_list, list) {
175       if (gpir_node_is_root(node))
176          schedule_calc_sched_info(node);
177       block->rsched.node_index++;
178    }
179 
180    /* step 4 */
181    struct list_head ready_list;
182    list_inithead(&ready_list);
183 
184    /* step 5 */
185    list_for_each_entry_safe(gpir_node, node, &node_list, list) {
186       if (gpir_node_is_root(node)) {
187          node->rsched.parent_index = INT_MAX;
188          schedule_insert_ready_list(&ready_list, node);
189       }
190    }
191 
192    /* step 6 */
193    schedule_ready_list(block, &ready_list);
194 }
195 
196 /* Due to how we translate from NIR, we never read a register written in the
197  * same block (we just pass the node through instead), so we don't have to
198  * worry about read-after-write dependencies. We do have to worry about
199  * write-after-read though, so we add those dependencies now. For example in a
200  * loop like this we need a dependency between the write and the read of i:
201  *
202  * i = ...
203  * while (...) {
204  *    ... = i;
205  *    i = i + 1;
206  * }
207  */
208 
add_false_dependencies(gpir_compiler * comp)209 static void add_false_dependencies(gpir_compiler *comp)
210 {
211    /* Make sure we allocate this only once, in case there are many values and
212     * many blocks.
213     */
214    gpir_node **last_written = calloc(comp->cur_reg, sizeof(gpir_node *));
215 
216    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
217       list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
218          if (node->op == gpir_op_load_reg) {
219             gpir_load_node *load = gpir_node_to_load(node);
220             gpir_node *store = last_written[load->reg->index];
221             if (store && store->block == block) {
222                gpir_node_add_dep(store, node, GPIR_DEP_WRITE_AFTER_READ);
223             }
224          } else if (node->op == gpir_op_store_reg) {
225             gpir_store_node *store = gpir_node_to_store(node);
226             last_written[store->reg->index] = node;
227          }
228       }
229    }
230 
231    free(last_written);
232 }
233 
gpir_reduce_reg_pressure_schedule_prog(gpir_compiler * comp)234 bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp)
235 {
236    add_false_dependencies(comp);
237 
238    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
239       block->rsched.node_index = 0;
240       list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
241          node->rsched.reg_pressure = -1;
242          node->rsched.est = 0;
243          node->rsched.scheduled = false;
244       }
245    }
246 
247    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
248       schedule_block(block);
249    }
250 
251    gpir_debug("after reduce scheduler\n");
252    gpir_node_print_prog_seq(comp);
253    return true;
254 }
255