1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/ralloc.h"
26
27 #include "gpir.h"
28 #include "lima_context.h"
29
gpir_lower_const(gpir_compiler * comp)30 static bool gpir_lower_const(gpir_compiler *comp)
31 {
32 int num_constant = 0;
33 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
34 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
35 if (node->op == gpir_op_const) {
36 if (gpir_node_is_root(node))
37 gpir_node_delete(node);
38 else
39 num_constant++;
40 }
41 }
42 }
43
44 if (num_constant) {
45 union fi *constant = ralloc_array(comp->prog, union fi, num_constant);
46 if (!constant)
47 return false;
48
49 comp->prog->constant = constant;
50 comp->prog->constant_size = num_constant * sizeof(union fi);
51
52 int index = 0;
53 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
54 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
55 if (node->op == gpir_op_const) {
56 gpir_const_node *c = gpir_node_to_const(node);
57
58 if (!gpir_node_is_root(node)) {
59 gpir_load_node *load = gpir_node_create(block, gpir_op_load_uniform);
60 if (unlikely(!load))
61 return false;
62
63 load->index = comp->constant_base + (index >> 2);
64 load->component = index % 4;
65 constant[index++] = c->value;
66
67 gpir_node_replace_succ(&load->node, node);
68
69 list_addtail(&load->node.list, &node->list);
70
71 gpir_debug("lower const create uniform %d for const %d\n",
72 load->node.index, node->index);
73 }
74
75 gpir_node_delete(node);
76 }
77 }
78 }
79 }
80
81 return true;
82 }
83
84 /* duplicate load to all its successors */
gpir_lower_load(gpir_compiler * comp)85 static bool gpir_lower_load(gpir_compiler *comp)
86 {
87 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
88 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
89 if (node->type == gpir_node_type_load) {
90 gpir_load_node *load = gpir_node_to_load(node);
91
92 bool first = true;
93 gpir_node_foreach_succ_safe(node, dep) {
94 gpir_node *succ = dep->succ;
95
96 if (first) {
97 first = false;
98 continue;
99 }
100
101 gpir_node *new = gpir_node_create(succ->block, node->op);
102 if (unlikely(!new))
103 return false;
104 list_addtail(&new->list, &succ->list);
105
106 gpir_debug("lower load create %d from %d for succ %d\n",
107 new->index, node->index, succ->index);
108
109 gpir_load_node *nload = gpir_node_to_load(new);
110 nload->index = load->index;
111 nload->component = load->component;
112 nload->reg = load->reg;
113
114 gpir_node_replace_pred(dep, new);
115 gpir_node_replace_child(succ, node, new);
116 }
117 }
118 }
119 }
120
121 return true;
122 }
123
gpir_lower_neg(gpir_block * block,gpir_node * node)124 static bool gpir_lower_neg(gpir_block *block, gpir_node *node)
125 {
126 gpir_alu_node *neg = gpir_node_to_alu(node);
127 gpir_node *child = neg->children[0];
128
129 /* check if child can dest negate */
130 if (child->type == gpir_node_type_alu) {
131 /* negate must be its only successor */
132 if (list_is_singular(&child->succ_list) &&
133 gpir_op_infos[child->op].dest_neg) {
134 gpir_alu_node *alu = gpir_node_to_alu(child);
135 alu->dest_negate = !alu->dest_negate;
136
137 gpir_node_replace_succ(child, node);
138 gpir_node_delete(node);
139 return true;
140 }
141 }
142
143 /* check if child can src negate */
144 gpir_node_foreach_succ_safe(node, dep) {
145 gpir_node *succ = dep->succ;
146 if (succ->type != gpir_node_type_alu)
147 continue;
148
149 bool success = true;
150 gpir_alu_node *alu = gpir_node_to_alu(dep->succ);
151 for (int i = 0; i < alu->num_child; i++) {
152 if (alu->children[i] == node) {
153 if (gpir_op_infos[succ->op].src_neg[i]) {
154 alu->children_negate[i] = !alu->children_negate[i];
155 alu->children[i] = child;
156 }
157 else
158 success = false;
159 }
160 }
161
162 if (success)
163 gpir_node_replace_pred(dep, child);
164 }
165
166 if (gpir_node_is_root(node))
167 gpir_node_delete(node);
168
169 return true;
170 }
171
gpir_lower_complex(gpir_block * block,gpir_node * node)172 static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
173 {
174 gpir_alu_node *alu = gpir_node_to_alu(node);
175 gpir_node *child = alu->children[0];
176
177 if (node->op == gpir_op_exp2) {
178 gpir_alu_node *preexp2 = gpir_node_create(block, gpir_op_preexp2);
179 if (unlikely(!preexp2))
180 return false;
181
182 preexp2->children[0] = child;
183 preexp2->num_child = 1;
184 gpir_node_add_dep(&preexp2->node, child, GPIR_DEP_INPUT);
185 list_addtail(&preexp2->node.list, &node->list);
186
187 child = &preexp2->node;
188 }
189
190 gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);
191 if (unlikely(!complex2))
192 return false;
193
194 complex2->children[0] = child;
195 complex2->num_child = 1;
196 gpir_node_add_dep(&complex2->node, child, GPIR_DEP_INPUT);
197 list_addtail(&complex2->node.list, &node->list);
198
199 int impl_op = 0;
200 switch (node->op) {
201 case gpir_op_rcp:
202 impl_op = gpir_op_rcp_impl;
203 break;
204 case gpir_op_rsqrt:
205 impl_op = gpir_op_rsqrt_impl;
206 break;
207 case gpir_op_exp2:
208 impl_op = gpir_op_exp2_impl;
209 break;
210 case gpir_op_log2:
211 impl_op = gpir_op_log2_impl;
212 break;
213 default:
214 assert(0);
215 }
216
217 gpir_alu_node *impl = gpir_node_create(block, impl_op);
218 if (unlikely(!impl))
219 return false;
220
221 impl->children[0] = child;
222 impl->num_child = 1;
223 gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);
224 list_addtail(&impl->node.list, &node->list);
225
226 gpir_alu_node *complex1 = gpir_node_create(block, gpir_op_complex1);
227 complex1->children[0] = &impl->node;
228 complex1->children[1] = &complex2->node;
229 complex1->children[2] = child;
230 complex1->num_child = 3;
231 gpir_node_add_dep(&complex1->node, child, GPIR_DEP_INPUT);
232 gpir_node_add_dep(&complex1->node, &impl->node, GPIR_DEP_INPUT);
233 gpir_node_add_dep(&complex1->node, &complex2->node, GPIR_DEP_INPUT);
234 list_addtail(&complex1->node.list, &node->list);
235
236 gpir_node *result = &complex1->node;
237
238 if (node->op == gpir_op_log2) {
239 gpir_alu_node *postlog2 = gpir_node_create(block, gpir_op_postlog2);
240 if (unlikely(!postlog2))
241 return false;
242
243 postlog2->children[0] = result;
244 postlog2->num_child = 1;
245 gpir_node_add_dep(&postlog2->node, result, GPIR_DEP_INPUT);
246 list_addtail(&postlog2->node.list, &node->list);
247
248 result = &postlog2->node;
249 }
250
251 gpir_node_replace_succ(result, node);
252 gpir_node_delete(node);
253
254 return true;
255 }
256
gpir_lower_node_may_consume_two_slots(gpir_compiler * comp)257 static bool gpir_lower_node_may_consume_two_slots(gpir_compiler *comp)
258 {
259 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
260 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
261 if (gpir_op_infos[node->op].may_consume_two_slots) {
262 /* dummy_f/m are auxiliary nodes for value reg alloc:
263 * 1. before reg alloc, create fake nodes dummy_f, dummy_m,
264 * so the tree become: (dummy_m (node dummy_f))
265 * dummy_m can be spilled, but other nodes in the tree can't
266 * be spilled.
267 * 2. After reg allocation and fake dep add, merge all deps of
268 * dummy_m and dummy_f to node and remove dummy_m & dummy_f
269 *
270 * We may also not use dummy_f/m, but alloc two value reg for
271 * node. But that means we need to make sure there're 2 free
272 * slot after the node successors, but we just need one slot
273 * after to be able to schedule it because we can use one move for
274 * the two slot node. It's also not easy to handle the spill case
275 * for the alloc 2 value method.
276 *
277 * With the dummy_f/m method, there's no such requirement, the
278 * node can be scheduled only when there's two slots for it,
279 * otherwise a move. And the node can be spilled with one reg.
280 */
281 gpir_node *dummy_m = gpir_node_create(block, gpir_op_dummy_m);
282 if (unlikely(!dummy_m))
283 return false;
284 list_add(&dummy_m->list, &node->list);
285
286 gpir_node *dummy_f = gpir_node_create(block, gpir_op_dummy_f);
287 if (unlikely(!dummy_f))
288 return false;
289 list_add(&dummy_f->list, &node->list);
290
291 gpir_alu_node *alu = gpir_node_to_alu(dummy_m);
292 alu->children[0] = node;
293 alu->children[1] = dummy_f;
294 alu->num_child = 2;
295
296 gpir_node_replace_succ(dummy_m, node);
297 gpir_node_add_dep(dummy_m, node, GPIR_DEP_INPUT);
298 gpir_node_add_dep(dummy_m, dummy_f, GPIR_DEP_INPUT);
299
300 }
301 }
302 }
303
304 return true;
305 }
306
307 /*
308 * There are no 'equal' or 'not-equal' opcodes.
309 * eq (a == b) is lowered to and(a >= b, b >= a)
310 * ne (a != b) is lowered to or(a < b, b < a)
311 */
gpir_lower_eq_ne(gpir_block * block,gpir_node * node)312 static bool gpir_lower_eq_ne(gpir_block *block, gpir_node *node)
313 {
314 gpir_op cmp_node_op;
315 gpir_op node_new_op;
316 switch (node->op) {
317 case gpir_op_eq:
318 cmp_node_op = gpir_op_ge;
319 node_new_op = gpir_op_min; /* and */
320 break;
321 case gpir_op_ne:
322 cmp_node_op = gpir_op_lt;
323 node_new_op = gpir_op_max; /* or */
324 break;
325 default:
326 unreachable("bad node op");
327 }
328
329 gpir_alu_node *e = gpir_node_to_alu(node);
330
331 gpir_alu_node *cmp1 = gpir_node_create(block, cmp_node_op);
332 list_addtail(&cmp1->node.list, &node->list);
333 gpir_alu_node *cmp2 = gpir_node_create(block, cmp_node_op);
334 list_addtail(&cmp2->node.list, &node->list);
335
336 cmp1->children[0] = e->children[0];
337 cmp1->children[1] = e->children[1];
338 cmp1->num_child = 2;
339
340 cmp2->children[0] = e->children[1];
341 cmp2->children[1] = e->children[0];
342 cmp2->num_child = 2;
343
344 gpir_node_add_dep(&cmp1->node, e->children[0], GPIR_DEP_INPUT);
345 gpir_node_add_dep(&cmp1->node, e->children[1], GPIR_DEP_INPUT);
346
347 gpir_node_add_dep(&cmp2->node, e->children[0], GPIR_DEP_INPUT);
348 gpir_node_add_dep(&cmp2->node, e->children[1], GPIR_DEP_INPUT);
349
350 gpir_node_foreach_pred_safe(node, dep) {
351 gpir_node_remove_dep(node, dep->pred);
352 }
353
354 gpir_node_add_dep(node, &cmp1->node, GPIR_DEP_INPUT);
355 gpir_node_add_dep(node, &cmp2->node, GPIR_DEP_INPUT);
356
357 node->op = node_new_op;
358 e->children[0] = &cmp1->node;
359 e->children[1] = &cmp2->node;
360 e->num_child = 2;
361
362 return true;
363 }
364
365 /*
366 * There is no 'abs' opcode.
367 * abs(a) is lowered to max(a, -a)
368 */
gpir_lower_abs(gpir_block * block,gpir_node * node)369 static bool gpir_lower_abs(gpir_block *block, gpir_node *node)
370 {
371 gpir_alu_node *alu = gpir_node_to_alu(node);
372
373 assert(node->op == gpir_op_abs);
374
375 node->op = gpir_op_max;
376
377 alu->children[1] = alu->children[0];
378 alu->children_negate[1] = true;
379 alu->num_child = 2;
380
381 return true;
382 }
383
384 /*
385 * There is no 'not' opcode.
386 * not(a) is lowered to add(1, -a)
387 */
gpir_lower_not(gpir_block * block,gpir_node * node)388 static bool gpir_lower_not(gpir_block *block, gpir_node *node)
389 {
390 gpir_alu_node *alu = gpir_node_to_alu(node);
391
392 assert(alu->node.op == gpir_op_not);
393
394 node->op = gpir_op_add;
395
396 gpir_node *node_const = gpir_node_create(block, gpir_op_const);
397 gpir_const_node *c = gpir_node_to_const(node_const);
398
399 assert(c->node.op == gpir_op_const);
400
401 list_addtail(&c->node.list, &node->list);
402 c->value.f = 1.0f;
403 gpir_node_add_dep(&alu->node, &c->node, GPIR_DEP_INPUT);
404
405 alu->children_negate[1] = !alu->children_negate[0];
406 alu->children[1] = alu->children[0];
407 alu->children[0] = &c->node;
408 alu->num_child = 2;
409
410 return true;
411 }
412
413 /* There is no unconditional branch instruction, so we have to lower it to a
414 * conditional branch with a condition of 1.0.
415 */
416
gpir_lower_branch_uncond(gpir_block * block,gpir_node * node)417 static bool gpir_lower_branch_uncond(gpir_block *block, gpir_node *node)
418 {
419 gpir_branch_node *branch = gpir_node_to_branch(node);
420
421 gpir_node *node_const = gpir_node_create(block, gpir_op_const);
422 gpir_const_node *c = gpir_node_to_const(node_const);
423
424 list_addtail(&c->node.list, &node->list);
425 c->value.f = 1.0f;
426 gpir_node_add_dep(&branch->node, &c->node, GPIR_DEP_INPUT);
427
428 branch->node.op = gpir_op_branch_cond;
429 branch->cond = node_const;
430
431 return true;
432 }
433
434 static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = {
435 [gpir_op_not] = gpir_lower_not,
436 [gpir_op_neg] = gpir_lower_neg,
437 [gpir_op_rcp] = gpir_lower_complex,
438 [gpir_op_rsqrt] = gpir_lower_complex,
439 [gpir_op_exp2] = gpir_lower_complex,
440 [gpir_op_log2] = gpir_lower_complex,
441 [gpir_op_eq] = gpir_lower_eq_ne,
442 [gpir_op_ne] = gpir_lower_eq_ne,
443 [gpir_op_abs] = gpir_lower_abs,
444 [gpir_op_branch_uncond] = gpir_lower_branch_uncond,
445 };
446
gpir_pre_rsched_lower_prog(gpir_compiler * comp)447 bool gpir_pre_rsched_lower_prog(gpir_compiler *comp)
448 {
449 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
450 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
451 if (gpir_pre_rsched_lower_funcs[node->op] &&
452 !gpir_pre_rsched_lower_funcs[node->op](block, node))
453 return false;
454 }
455 }
456
457 if (!gpir_lower_const(comp))
458 return false;
459
460 if (!gpir_lower_load(comp))
461 return false;
462
463 if (!gpir_lower_node_may_consume_two_slots(comp))
464 return false;
465
466 gpir_debug("pre rsched lower prog\n");
467 gpir_node_print_prog_seq(comp);
468 return true;
469 }
470
471