1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <limits.h>
26
27 #include "gpir.h"
28
29 /* Register sensitive schedule algorithm from paper:
30 * "Register-Sensitive Selection, Duplication, and Sequencing of Instructions"
31 * Author: Vivek Sarkar, Mauricio J. Serrano, Barbara B. Simons
32 */
33
cmp_float(const void * a,const void * b)34 static int cmp_float(const void *a, const void *b)
35 {
36 const float *fa = (const float *) a;
37 const float *fb = (const float *) b;
38 return (*fa > *fb) - (*fa < *fb);
39 }
40
schedule_calc_sched_info(gpir_node * node)41 static void schedule_calc_sched_info(gpir_node *node)
42 {
43 int n = 0;
44 float extra_reg = 1.0f;
45
46 /* update all children's sched info */
47 gpir_node_foreach_pred(node, dep) {
48 gpir_node *pred = dep->pred;
49
50 if (pred->rsched.reg_pressure < 0)
51 schedule_calc_sched_info(pred);
52
53 int est = pred->rsched.est + 1;
54 if (node->rsched.est < est)
55 node->rsched.est = est;
56
57 float reg_weight = 1.0f - 1.0f / list_length(&pred->succ_list);
58 if (extra_reg > reg_weight)
59 extra_reg = reg_weight;
60
61 n++;
62 }
63
64 /* leaf instr */
65 if (!n) {
66 node->rsched.reg_pressure = 0;
67 return;
68 }
69
70 int i = 0;
71 float reg[n];
72 gpir_node_foreach_pred(node, dep) {
73 gpir_node *pred = dep->pred;
74 reg[i++] = pred->rsched.reg_pressure;
75 }
76
77 /* sort */
78 qsort(reg, n, sizeof(reg[0]), cmp_float);
79
80 for (i = 0; i < n; i++) {
81 float pressure = reg[i] + n - (i + 1);
82 if (pressure > node->rsched.reg_pressure)
83 node->rsched.reg_pressure = pressure;
84 }
85
86 /* If all children of this node have multi parents, then this
87 * node need an extra reg to store its result. For example,
88 * it's not fair for parent has the same reg pressure as child
89 * if n==1 and child's successor>1, because we need 2 reg for
90 * this.
91 *
92 * But we can't add a full reg to the reg_pressure, because the
93 * last parent of a multi-successor child doesn't need an extra
94 * reg. For example, a single child (with multi successor) node
95 * should has less reg pressure than a two children (with single
96 * successor) instr.
97 *
98 * extra reg = min(all child)(1.0 - 1.0 / num successor)
99 */
100 node->rsched.reg_pressure += extra_reg;
101 }
102
schedule_insert_ready_list(struct list_head * ready_list,gpir_node * insert_node)103 static void schedule_insert_ready_list(struct list_head *ready_list,
104 gpir_node *insert_node)
105 {
106 struct list_head *insert_pos = ready_list;
107
108 list_for_each_entry(gpir_node, node, ready_list, list) {
109 if (gpir_op_infos[node->op].schedule_first) {
110 continue;
111 }
112
113 if (gpir_op_infos[insert_node->op].schedule_first ||
114 insert_node->rsched.parent_index < node->rsched.parent_index ||
115 (insert_node->rsched.parent_index == node->rsched.parent_index &&
116 (insert_node->rsched.reg_pressure < node->rsched.reg_pressure ||
117 (insert_node->rsched.reg_pressure == node->rsched.reg_pressure &&
118 (insert_node->rsched.est >= node->rsched.est))))) {
119 insert_pos = &node->list;
120 if (node == insert_node)
121 return;
122 break;
123 }
124 }
125
126 list_del(&insert_node->list);
127 list_addtail(&insert_node->list, insert_pos);
128 }
129
schedule_ready_list(gpir_block * block,struct list_head * ready_list)130 static void schedule_ready_list(gpir_block *block, struct list_head *ready_list)
131 {
132 if (list_is_empty(ready_list))
133 return;
134
135 gpir_node *node = list_first_entry(ready_list, gpir_node, list);
136 list_del(&node->list);
137
138 /* schedule the node to the block node list */
139 list_add(&node->list, &block->node_list);
140 node->rsched.scheduled = true;
141 block->rsched.node_index--;
142
143 gpir_node_foreach_pred(node, dep) {
144 gpir_node *pred = dep->pred;
145 pred->rsched.parent_index = block->rsched.node_index;
146
147 bool ready = true;
148 gpir_node_foreach_succ(pred, dep) {
149 gpir_node *succ = dep->succ;
150 if (!succ->rsched.scheduled) {
151 ready = false;
152 break;
153 }
154 }
155 /* all successor have been scheduled */
156 if (ready)
157 schedule_insert_ready_list(ready_list, pred);
158 }
159
160 schedule_ready_list(block, ready_list);
161 }
162
schedule_block(gpir_block * block)163 static void schedule_block(gpir_block *block)
164 {
165 /* move all nodes to node_list, block->node_list will
166 * contain schedule result */
167 struct list_head node_list;
168 list_replace(&block->node_list, &node_list);
169 list_inithead(&block->node_list);
170
171 /* step 2 & 3 */
172 list_for_each_entry(gpir_node, node, &node_list, list) {
173 if (gpir_node_is_root(node))
174 schedule_calc_sched_info(node);
175 block->rsched.node_index++;
176 }
177
178 /* step 4 */
179 struct list_head ready_list;
180 list_inithead(&ready_list);
181
182 /* step 5 */
183 list_for_each_entry_safe(gpir_node, node, &node_list, list) {
184 if (gpir_node_is_root(node)) {
185 node->rsched.parent_index = INT_MAX;
186 schedule_insert_ready_list(&ready_list, node);
187 }
188 }
189
190 /* step 6 */
191 schedule_ready_list(block, &ready_list);
192 }
193
194 /* Due to how we translate from NIR, we never read a register written in the
195 * same block (we just pass the node through instead), so we don't have to
196 * worry about read-after-write dependencies. We do have to worry about
197 * write-after-read though, so we add those dependencies now. For example in a
198 * loop like this we need a dependency between the write and the read of i:
199 *
200 * i = ...
201 * while (...) {
202 * ... = i;
203 * i = i + 1;
204 * }
205 */
206
add_false_dependencies(gpir_compiler * comp)207 static void add_false_dependencies(gpir_compiler *comp)
208 {
209 /* Make sure we allocate this only once, in case there are many values and
210 * many blocks.
211 */
212 gpir_node **last_written = calloc(comp->cur_reg, sizeof(gpir_node *));
213
214 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
215 list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
216 if (node->op == gpir_op_load_reg) {
217 gpir_load_node *load = gpir_node_to_load(node);
218 gpir_node *store = last_written[load->reg->index];
219 if (store && store->block == block) {
220 gpir_node_add_dep(store, node, GPIR_DEP_WRITE_AFTER_READ);
221 }
222 } else if (node->op == gpir_op_store_reg) {
223 gpir_store_node *store = gpir_node_to_store(node);
224 last_written[store->reg->index] = node;
225 }
226 }
227 }
228
229 free(last_written);
230 }
231
gpir_reduce_reg_pressure_schedule_prog(gpir_compiler * comp)232 bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp)
233 {
234 add_false_dependencies(comp);
235
236 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
237 block->rsched.node_index = 0;
238 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
239 node->rsched.reg_pressure = -1;
240 node->rsched.est = 0;
241 node->rsched.scheduled = false;
242 }
243 }
244
245 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
246 schedule_block(block);
247 }
248
249 gpir_debug("after reduce scheduler\n");
250 gpir_node_print_prog_seq(comp);
251 return true;
252 }
253