1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/bitscan.h"
26 #include "util/ralloc.h"
27
28 #include "ppir.h"
29
ppir_lower_const(ppir_block * block,ppir_node * node)30 static bool ppir_lower_const(ppir_block *block, ppir_node *node)
31 {
32 if (ppir_node_is_root(node)) {
33 ppir_node_delete(node);
34 return true;
35 }
36
37 assert(ppir_node_has_single_succ(node));
38
39 ppir_node *succ = ppir_node_first_succ(node);
40 ppir_dest *dest = ppir_node_get_dest(node);
41
42 switch (succ->type) {
43 case ppir_node_type_alu:
44 case ppir_node_type_branch:
45 /* ALU and branch can consume consts directly */
46 dest->type = ppir_target_pipeline;
47 /* Reg will be updated in node_to_instr later */
48 dest->pipeline = ppir_pipeline_reg_const0;
49
50 /* single succ can still have multiple references to this node */
51 for (int i = 0; i < ppir_node_get_src_num(succ); i++) {
52 ppir_src *src = ppir_node_get_src(succ, i);
53 if (src && src->node == node) {
54 src->type = ppir_target_pipeline;
55 src->pipeline = ppir_pipeline_reg_const0;
56 }
57 }
58 return true;
59 default:
60 /* Create a move for everyone else */
61 break;
62 }
63
64 ppir_node *move = ppir_node_insert_mov(node);
65 if (unlikely(!move))
66 return false;
67
68 ppir_debug("lower const create move %d for %d\n",
69 move->index, node->index);
70
71 /* Need to be careful with changing src/dst type here:
72 * it has to be done *after* successors have their children
73 * replaced, otherwise ppir_node_replace_child() won't find
74 * matching src/dst and as result won't work
75 */
76 ppir_src *mov_src = ppir_node_get_src(move, 0);
77 mov_src->type = dest->type = ppir_target_pipeline;
78 mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_const0;
79
80 return true;
81 }
82
ppir_lower_swap_args(ppir_block * block,ppir_node * node)83 static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node)
84 {
85 /* swapped op must be the next op */
86 node->op++;
87
88 assert(node->type == ppir_node_type_alu);
89 ppir_alu_node *alu = ppir_node_to_alu(node);
90 assert(alu->num_src == 2);
91
92 ppir_src tmp = alu->src[0];
93 alu->src[0] = alu->src[1];
94 alu->src[1] = tmp;
95 return true;
96 }
97
ppir_lower_load(ppir_block * block,ppir_node * node)98 static bool ppir_lower_load(ppir_block *block, ppir_node *node)
99 {
100 ppir_dest *dest = ppir_node_get_dest(node);
101 if (ppir_node_is_root(node) && !node->succ_different_block &&
102 dest->type == ppir_target_ssa) {
103 ppir_node_delete(node);
104 return true;
105 }
106
107 /* load can have multiple successors in case if we duplicated load node
108 * that has load node in source
109 */
110 if ((ppir_node_has_single_src_succ(node) || ppir_node_is_root(node)) &&
111 !node->succ_different_block &&
112 dest->type != ppir_target_register) {
113 ppir_node *succ = ppir_node_first_succ(node);
114 switch (succ->type) {
115 case ppir_node_type_alu:
116 case ppir_node_type_branch: {
117 /* single succ can still have multiple references to this node */
118 for (int i = 0; i < ppir_node_get_src_num(succ); i++) {
119 ppir_src *src = ppir_node_get_src(succ, i);
120 if (src && src->node == node) {
121 /* Can consume uniforms directly */
122 src->type = dest->type = ppir_target_pipeline;
123 src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform;
124 }
125 }
126 return true;
127 }
128 default:
129 /* Create mov for everyone else */
130 break;
131 }
132 }
133
134 ppir_node *move = ppir_node_insert_mov(node);
135 if (unlikely(!move))
136 return false;
137
138 ppir_src *mov_src = ppir_node_get_src(move, 0);
139 mov_src->type = dest->type = ppir_target_pipeline;
140 mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform;
141
142 return true;
143 }
144
ppir_lower_ddxy(ppir_block * block,ppir_node * node)145 static bool ppir_lower_ddxy(ppir_block *block, ppir_node *node)
146 {
147 assert(node->type == ppir_node_type_alu);
148 ppir_alu_node *alu = ppir_node_to_alu(node);
149
150 alu->src[1] = alu->src[0];
151 if (node->op == ppir_op_ddx)
152 alu->src[1].negate = !alu->src[1].negate;
153 else if (node->op == ppir_op_ddy)
154 alu->src[0].negate = !alu->src[0].negate;
155 else
156 assert(0);
157
158 alu->num_src = 2;
159
160 return true;
161 }
162
ppir_lower_texture(ppir_block * block,ppir_node * node)163 static bool ppir_lower_texture(ppir_block *block, ppir_node *node)
164 {
165 ppir_dest *dest = ppir_node_get_dest(node);
166
167 if (ppir_node_has_single_succ(node) && dest->type == ppir_target_ssa) {
168 ppir_node *succ = ppir_node_first_succ(node);
169 dest->type = ppir_target_pipeline;
170 dest->pipeline = ppir_pipeline_reg_sampler;
171
172 for (int i = 0; i < ppir_node_get_src_num(succ); i++) {
173 ppir_src *src = ppir_node_get_src(succ, i);
174 if (src && src->node == node) {
175 src->type = ppir_target_pipeline;
176 src->pipeline = ppir_pipeline_reg_sampler;
177 }
178 }
179 return true;
180 }
181
182 /* Create move node as fallback */
183 ppir_node *move = ppir_node_insert_mov(node);
184 if (unlikely(!move))
185 return false;
186
187 ppir_debug("lower texture create move %d for %d\n",
188 move->index, node->index);
189
190 ppir_src *mov_src = ppir_node_get_src(move, 0);
191 mov_src->type = dest->type = ppir_target_pipeline;
192 mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_sampler;
193
194 return true;
195 }
196
197 /* Check if the select condition and ensure it can be inserted to
198 * the scalar mul slot */
ppir_lower_select(ppir_block * block,ppir_node * node)199 static bool ppir_lower_select(ppir_block *block, ppir_node *node)
200 {
201 ppir_alu_node *alu = ppir_node_to_alu(node);
202 ppir_src *src0 = &alu->src[0];
203 ppir_src *src1 = &alu->src[1];
204 ppir_src *src2 = &alu->src[2];
205
206 /* If the condition is already an alu scalar whose only successor
207 * is the select node, just turn it into pipeline output. */
208 /* The (src2->node == cond) case is a tricky exception.
209 * The reason is that we must force cond to output to ^fmul -- but
210 * then it no longer writes to a register and it is impossible to
211 * reference ^fmul in src2. So in that exceptional case, also fall
212 * back to the mov. */
213 ppir_node *cond = src0->node;
214 if (cond &&
215 cond->type == ppir_node_type_alu &&
216 ppir_node_has_single_succ(cond) &&
217 ppir_target_is_scalar(ppir_node_get_dest(cond)) &&
218 ppir_node_schedulable_slot(cond, PPIR_INSTR_SLOT_ALU_SCL_MUL) &&
219 src2->node != cond) {
220
221 ppir_dest *cond_dest = ppir_node_get_dest(cond);
222 cond_dest->type = ppir_target_pipeline;
223 cond_dest->pipeline = ppir_pipeline_reg_fmul;
224
225 ppir_node_target_assign(src0, cond);
226
227 /* src1 could also be a reference from the same node as
228 * the condition, so update it in that case. */
229 if (src1->node && src1->node == cond)
230 ppir_node_target_assign(src1, cond);
231
232 return true;
233 }
234
235 /* If the condition can't be used for any reason, insert a mov
236 * so that the condition can end up in ^fmul */
237 ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0);
238 if (!move)
239 return false;
240 list_addtail(&move->list, &node->list);
241
242 ppir_alu_node *move_alu = ppir_node_to_alu(move);
243 ppir_src *move_src = move_alu->src;
244 move_src->type = src0->type;
245 move_src->ssa = src0->ssa;
246 move_src->swizzle[0] = src0->swizzle[0];
247 move_alu->num_src = 1;
248
249 ppir_dest *move_dest = &move_alu->dest;
250 move_dest->type = ppir_target_pipeline;
251 move_dest->pipeline = ppir_pipeline_reg_fmul;
252 move_dest->write_mask = 1;
253
254 ppir_node *pred = src0->node;
255 ppir_dep *dep = ppir_dep_for_pred(node, pred);
256 if (dep)
257 ppir_node_replace_pred(dep, move);
258 else
259 ppir_node_add_dep(node, move, ppir_dep_src);
260
261 /* pred can be a register */
262 if (pred)
263 ppir_node_add_dep(move, pred, ppir_dep_src);
264
265 ppir_node_target_assign(src0, move);
266
267 /* src1 could also be a reference from the same node as
268 * the condition, so update it in that case. */
269 if (src1->node && src1->node == pred)
270 ppir_node_target_assign(src1, move);
271
272 return true;
273 }
274
ppir_lower_trunc(ppir_block * block,ppir_node * node)275 static bool ppir_lower_trunc(ppir_block *block, ppir_node *node)
276 {
277 /* Turn it into a mov with a round to integer output modifier */
278 ppir_alu_node *alu = ppir_node_to_alu(node);
279 ppir_dest *move_dest = &alu->dest;
280 move_dest->modifier = ppir_outmod_round;
281 node->op = ppir_op_mov;
282
283 return true;
284 }
285
ppir_lower_abs(ppir_block * block,ppir_node * node)286 static bool ppir_lower_abs(ppir_block *block, ppir_node *node)
287 {
288 /* Turn it into a mov and set the absolute modifier */
289 ppir_alu_node *alu = ppir_node_to_alu(node);
290
291 assert(alu->num_src == 1);
292
293 alu->src[0].absolute = true;
294 alu->src[0].negate = false;
295 node->op = ppir_op_mov;
296
297 return true;
298 }
299
ppir_lower_neg(ppir_block * block,ppir_node * node)300 static bool ppir_lower_neg(ppir_block *block, ppir_node *node)
301 {
302 /* Turn it into a mov and set the negate modifier */
303 ppir_alu_node *alu = ppir_node_to_alu(node);
304
305 assert(alu->num_src == 1);
306
307 alu->src[0].negate = !alu->src[0].negate;
308 node->op = ppir_op_mov;
309
310 return true;
311 }
312
ppir_lower_sat(ppir_block * block,ppir_node * node)313 static bool ppir_lower_sat(ppir_block *block, ppir_node *node)
314 {
315 /* Turn it into a mov with the saturate output modifier */
316 ppir_alu_node *alu = ppir_node_to_alu(node);
317
318 assert(alu->num_src == 1);
319
320 ppir_dest *move_dest = &alu->dest;
321 move_dest->modifier = ppir_outmod_clamp_fraction;
322 node->op = ppir_op_mov;
323
324 return true;
325 }
326
ppir_lower_branch_merge_condition(ppir_block * block,ppir_node * node)327 static bool ppir_lower_branch_merge_condition(ppir_block *block, ppir_node *node)
328 {
329 /* Check if we can merge a condition with a branch instruction,
330 * removing the need for a select instruction */
331 assert(node->type == ppir_node_type_branch);
332
333 if (!ppir_node_has_single_pred(node))
334 return false;
335
336 ppir_node *pred = ppir_node_first_pred(node);
337 assert(pred);
338
339 if (pred->type != ppir_node_type_alu)
340 return false;
341
342 switch (pred->op)
343 {
344 case ppir_op_lt:
345 case ppir_op_gt:
346 case ppir_op_le:
347 case ppir_op_ge:
348 case ppir_op_eq:
349 case ppir_op_ne:
350 break;
351 default:
352 return false;
353 }
354
355 ppir_dest *dest = ppir_node_get_dest(pred);
356 if (!ppir_node_has_single_succ(pred) || dest->type != ppir_target_ssa)
357 return false;
358
359 ppir_alu_node *cond = ppir_node_to_alu(pred);
360 /* branch can't reference pipeline registers */
361 if (cond->src[0].type == ppir_target_pipeline ||
362 cond->src[1].type == ppir_target_pipeline)
363 return false;
364
365 /* branch can't use flags */
366 if (cond->src[0].negate || cond->src[0].absolute ||
367 cond->src[1].negate || cond->src[1].absolute)
368 return false;
369
370 /* at this point, it can be successfully be replaced. */
371 ppir_branch_node *branch = ppir_node_to_branch(node);
372 switch (pred->op)
373 {
374 case ppir_op_le:
375 branch->cond_gt = true;
376 break;
377 case ppir_op_lt:
378 branch->cond_eq = true;
379 branch->cond_gt = true;
380 break;
381 case ppir_op_ge:
382 branch->cond_lt = true;
383 break;
384 case ppir_op_gt:
385 branch->cond_eq = true;
386 branch->cond_lt = true;
387 break;
388 case ppir_op_eq:
389 branch->cond_lt = true;
390 branch->cond_gt = true;
391 break;
392 case ppir_op_ne:
393 branch->cond_eq = true;
394 break;
395 default:
396 assert(0);
397 break;
398 }
399
400 assert(cond->num_src == 2);
401
402 branch->num_src = 2;
403 branch->src[0] = cond->src[0];
404 branch->src[1] = cond->src[1];
405
406 /* for all nodes before the condition */
407 ppir_node_foreach_pred_safe(pred, dep) {
408 /* insert the branch node as successor */
409 ppir_node *p = dep->pred;
410 ppir_node_remove_dep(dep);
411 ppir_node_add_dep(node, p, ppir_dep_src);
412 }
413
414 ppir_node_delete(pred);
415
416 return true;
417 }
418
ppir_lower_branch(ppir_block * block,ppir_node * node)419 static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
420 {
421 ppir_branch_node *branch = ppir_node_to_branch(node);
422
423 /* Unconditional branch */
424 if (branch->num_src == 0)
425 return true;
426
427 /* Check if we can merge a condition with the branch */
428 if (ppir_lower_branch_merge_condition(block, node))
429 return true;
430
431 /* If the condition cannot be merged, fall back to a
432 * comparison against zero */
433 ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);
434
435 if (!zero)
436 return false;
437
438 zero->constant.value[0].f = 0;
439 zero->constant.num = 1;
440 zero->dest.type = ppir_target_pipeline;
441 zero->dest.pipeline = ppir_pipeline_reg_const0;
442 zero->dest.ssa.num_components = 1;
443 zero->dest.write_mask = 0x01;
444
445 ppir_node_target_assign(&branch->src[1], &zero->node);
446
447 if (branch->negate)
448 branch->cond_eq = true;
449 else {
450 branch->cond_gt = true;
451 branch->cond_lt = true;
452 }
453
454 branch->num_src = 2;
455
456 ppir_node_add_dep(&branch->node, &zero->node, ppir_dep_src);
457 list_addtail(&zero->node.list, &node->list);
458
459 return true;
460 }
461
ppir_lower_accum(ppir_block * block,ppir_node * node)462 static bool ppir_lower_accum(ppir_block *block, ppir_node *node)
463 {
464 /* If the last argument of a node placed in PPIR_INSTR_SLOT_ALU_SCL_ADD
465 * (or PPIR_INSTR_SLOT_ALU_VEC_ADD) is placed in
466 * PPIR_INSTR_SLOT_ALU_SCL_MUL (or PPIR_INSTR_SLOT_ALU_VEC_MUL) we cannot
467 * save a register (and an instruction) by using a pipeline register.
468 * Therefore it is interesting to make sure arguments of that type are
469 * the first argument by swapping arguments (if possible) */
470 ppir_alu_node *alu = ppir_node_to_alu(node);
471
472 assert(alu->num_src >= 2);
473
474 if (alu->src[0].type == ppir_target_pipeline)
475 return true;
476
477 if (alu->src[0].type == ppir_target_ssa) {
478 int *src_0_slots = ppir_op_infos[alu->src[0].node->op].slots;
479 if (src_0_slots) {
480 for (int i = 0; src_0_slots[i] != PPIR_INSTR_SLOT_END; i++) {
481 if ((src_0_slots[i] == PPIR_INSTR_SLOT_ALU_SCL_MUL) ||
482 (src_0_slots[i] == PPIR_INSTR_SLOT_ALU_VEC_MUL)) {
483 return true;
484 }
485 }
486 }
487 }
488
489 int src_to_swap = -1;
490 for (int j = 1; j < alu->num_src; j++) {
491 if (alu->src[j].type != ppir_target_ssa)
492 continue;
493 int *src_slots = ppir_op_infos[alu->src[j].node->op].slots;
494 if (!src_slots)
495 continue;
496 for (int i = 0; src_slots[i] != PPIR_INSTR_SLOT_END; i++) {
497 if ((src_slots[i] == PPIR_INSTR_SLOT_ALU_SCL_MUL) ||
498 (src_slots[i] == PPIR_INSTR_SLOT_ALU_VEC_MUL)) {
499 src_to_swap = j;
500 break;
501 }
502 }
503 if (src_to_swap > 0)
504 break;
505 }
506
507 if (src_to_swap < 0)
508 return true;
509
510 /* Swap arguments so that we can use a pipeline register later on */
511 ppir_src tmp = alu->src[0];
512 alu->src[0] = alu->src[src_to_swap];
513 alu->src[src_to_swap] = tmp;
514
515 return true;
516 }
517
518 static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
519 [ppir_op_abs] = ppir_lower_abs,
520 [ppir_op_neg] = ppir_lower_neg,
521 [ppir_op_const] = ppir_lower_const,
522 [ppir_op_ddx] = ppir_lower_ddxy,
523 [ppir_op_ddy] = ppir_lower_ddxy,
524 [ppir_op_lt] = ppir_lower_swap_args,
525 [ppir_op_le] = ppir_lower_swap_args,
526 [ppir_op_load_texture] = ppir_lower_texture,
527 [ppir_op_select] = ppir_lower_select,
528 [ppir_op_trunc] = ppir_lower_trunc,
529 [ppir_op_sat] = ppir_lower_sat,
530 [ppir_op_branch] = ppir_lower_branch,
531 [ppir_op_load_uniform] = ppir_lower_load,
532 [ppir_op_load_temp] = ppir_lower_load,
533 [ppir_op_add] = ppir_lower_accum,
534 [ppir_op_max] = ppir_lower_accum,
535 [ppir_op_min] = ppir_lower_accum,
536 [ppir_op_eq] = ppir_lower_accum,
537 [ppir_op_ne] = ppir_lower_accum,
538 };
539
ppir_lower_prog(ppir_compiler * comp)540 bool ppir_lower_prog(ppir_compiler *comp)
541 {
542 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
543 list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {
544 if (ppir_lower_funcs[node->op] &&
545 !ppir_lower_funcs[node->op](block, node))
546 return false;
547 }
548 }
549
550 return true;
551 }
552