1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32
33
34 #include "ppir.h"
35
ppir_node_create_ssa(ppir_block * block,ppir_op op,nir_ssa_def * ssa)36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37 {
38 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39 if (!node)
40 return NULL;
41
42 ppir_dest *dest = ppir_node_get_dest(node);
43 dest->type = ppir_target_ssa;
44 dest->ssa.num_components = ssa->num_components;
45 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
46
47 if (node->type == ppir_node_type_load ||
48 node->type == ppir_node_type_store)
49 dest->ssa.is_head = true;
50
51 return node;
52 }
53
ppir_node_create_reg(ppir_block * block,ppir_op op,nir_register * reg,unsigned mask)54 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
55 nir_register *reg, unsigned mask)
56 {
57 ppir_node *node = ppir_node_create(block, op, reg->index, mask);
58 if (!node)
59 return NULL;
60
61 ppir_dest *dest = ppir_node_get_dest(node);
62
63 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
64 if (r->index == reg->index) {
65 dest->reg = r;
66 break;
67 }
68 }
69
70 dest->type = ppir_target_register;
71 dest->write_mask = mask;
72
73 if (node->type == ppir_node_type_load ||
74 node->type == ppir_node_type_store)
75 dest->reg->is_head = true;
76
77 return node;
78 }
79
ppir_node_create_dest(ppir_block * block,ppir_op op,nir_dest * dest,unsigned mask)80 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
81 nir_dest *dest, unsigned mask)
82 {
83 unsigned index = -1;
84
85 if (dest) {
86 if (dest->is_ssa)
87 return ppir_node_create_ssa(block, op, &dest->ssa);
88 else
89 return ppir_node_create_reg(block, op, dest->reg.reg, mask);
90 }
91
92 return ppir_node_create(block, op, index, 0);
93 }
94
ppir_node_add_src(ppir_compiler * comp,ppir_node * node,ppir_src * ps,nir_src * ns,unsigned mask)95 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
96 ppir_src *ps, nir_src *ns, unsigned mask)
97 {
98 ppir_node *child = NULL;
99
100 if (ns->is_ssa) {
101 child = comp->var_nodes[ns->ssa->index];
102 if (child->op != ppir_op_undef)
103 ppir_node_add_dep(node, child, ppir_dep_src);
104 }
105 else {
106 nir_register *reg = ns->reg.reg;
107 while (mask) {
108 int swizzle = ps->swizzle[u_bit_scan(&mask)];
109 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
110 /* Reg is read before it was written, create a dummy node for it */
111 if (!child) {
112 child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,
113 u_bit_consecutive(0, 4));
114 comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
115 }
116 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
117 if (child && node != child && child->op != ppir_op_dummy)
118 ppir_node_add_dep(node, child, ppir_dep_src);
119 }
120 }
121
122 assert(child);
123 ppir_node_target_assign(ps, child);
124 }
125
126 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
127 [nir_op_mov] = ppir_op_mov,
128 [nir_op_fmul] = ppir_op_mul,
129 [nir_op_fabs] = ppir_op_abs,
130 [nir_op_fneg] = ppir_op_neg,
131 [nir_op_fadd] = ppir_op_add,
132 [nir_op_fsum3] = ppir_op_sum3,
133 [nir_op_fsum4] = ppir_op_sum4,
134 [nir_op_frsq] = ppir_op_rsqrt,
135 [nir_op_flog2] = ppir_op_log2,
136 [nir_op_fexp2] = ppir_op_exp2,
137 [nir_op_fsqrt] = ppir_op_sqrt,
138 [nir_op_fsin] = ppir_op_sin,
139 [nir_op_fcos] = ppir_op_cos,
140 [nir_op_fmax] = ppir_op_max,
141 [nir_op_fmin] = ppir_op_min,
142 [nir_op_frcp] = ppir_op_rcp,
143 [nir_op_ffloor] = ppir_op_floor,
144 [nir_op_fceil] = ppir_op_ceil,
145 [nir_op_ffract] = ppir_op_fract,
146 [nir_op_sge] = ppir_op_ge,
147 [nir_op_slt] = ppir_op_lt,
148 [nir_op_seq] = ppir_op_eq,
149 [nir_op_sne] = ppir_op_ne,
150 [nir_op_fcsel] = ppir_op_select,
151 [nir_op_inot] = ppir_op_not,
152 [nir_op_ftrunc] = ppir_op_trunc,
153 [nir_op_fsat] = ppir_op_sat,
154 [nir_op_fddx] = ppir_op_ddx,
155 [nir_op_fddy] = ppir_op_ddy,
156 };
157
ppir_emit_alu(ppir_block * block,nir_instr * ni)158 static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
159 {
160 nir_alu_instr *instr = nir_instr_as_alu(ni);
161 int op = nir_to_ppir_opcodes[instr->op];
162
163 if (op == ppir_op_unsupported) {
164 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
165 return false;
166 }
167
168 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
169 instr->dest.write_mask);
170 if (!node)
171 return false;
172
173 ppir_dest *pd = &node->dest;
174 nir_alu_dest *nd = &instr->dest;
175 if (nd->saturate)
176 pd->modifier = ppir_outmod_clamp_fraction;
177
178 unsigned src_mask;
179 switch (op) {
180 case ppir_op_sum3:
181 src_mask = 0b0111;
182 break;
183 case ppir_op_sum4:
184 src_mask = 0b1111;
185 break;
186 default:
187 src_mask = pd->write_mask;
188 break;
189 }
190
191 unsigned num_child = nir_op_infos[instr->op].num_inputs;
192 node->num_src = num_child;
193
194 for (int i = 0; i < num_child; i++) {
195 nir_alu_src *ns = instr->src + i;
196 ppir_src *ps = node->src + i;
197 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
198 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
199
200 ps->absolute = ns->abs;
201 ps->negate = ns->negate;
202 }
203
204 list_addtail(&node->node.list, &block->node_list);
205 return true;
206 }
207
208 static ppir_block *ppir_block_create(ppir_compiler *comp);
209
ppir_emit_discard_block(ppir_compiler * comp)210 static bool ppir_emit_discard_block(ppir_compiler *comp)
211 {
212 ppir_block *block = ppir_block_create(comp);
213 ppir_discard_node *discard;
214 if (!block)
215 return false;
216
217 comp->discard_block = block;
218 block->comp = comp;
219
220 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
221 if (discard)
222 list_addtail(&discard->node.list, &block->node_list);
223 else
224 return false;
225
226 return true;
227 }
228
ppir_emit_discard_if(ppir_block * block,nir_instr * ni)229 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
230 {
231 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
232 ppir_node *node;
233 ppir_compiler *comp = block->comp;
234 ppir_branch_node *branch;
235
236 if (!comp->discard_block && !ppir_emit_discard_block(comp))
237 return NULL;
238
239 node = ppir_node_create(block, ppir_op_branch, -1, 0);
240 if (!node)
241 return NULL;
242 branch = ppir_node_to_branch(node);
243
244 /* second src and condition will be updated during lowering */
245 ppir_node_add_src(block->comp, node, &branch->src[0],
246 &instr->src[0], u_bit_consecutive(0, instr->num_components));
247 branch->num_src = 1;
248 branch->target = comp->discard_block;
249
250 return node;
251 }
252
ppir_emit_discard(ppir_block * block,nir_instr * ni)253 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
254 {
255 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
256
257 return node;
258 }
259
ppir_emit_intrinsic(ppir_block * block,nir_instr * ni)260 static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
261 {
262 ppir_node *node;
263 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
264 unsigned mask = 0;
265 ppir_load_node *lnode;
266 ppir_alu_node *alu_node;
267
268 switch (instr->intrinsic) {
269 case nir_intrinsic_load_input:
270 if (!instr->dest.is_ssa)
271 mask = u_bit_consecutive(0, instr->num_components);
272
273 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
274 if (!lnode)
275 return false;
276
277 lnode->num_components = instr->num_components;
278 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
279 if (nir_src_is_const(instr->src[0]))
280 lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
281 else {
282 lnode->num_src = 1;
283 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
284 }
285 list_addtail(&lnode->node.list, &block->node_list);
286 return true;
287
288 case nir_intrinsic_load_frag_coord:
289 case nir_intrinsic_load_point_coord:
290 case nir_intrinsic_load_front_face:
291 if (!instr->dest.is_ssa)
292 mask = u_bit_consecutive(0, instr->num_components);
293
294 ppir_op op;
295 switch (instr->intrinsic) {
296 case nir_intrinsic_load_frag_coord:
297 op = ppir_op_load_fragcoord;
298 break;
299 case nir_intrinsic_load_point_coord:
300 op = ppir_op_load_pointcoord;
301 break;
302 case nir_intrinsic_load_front_face:
303 op = ppir_op_load_frontface;
304 break;
305 default:
306 unreachable("bad intrinsic");
307 break;
308 }
309
310 lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
311 if (!lnode)
312 return false;
313
314 lnode->num_components = instr->num_components;
315 list_addtail(&lnode->node.list, &block->node_list);
316 return true;
317
318 case nir_intrinsic_load_uniform:
319 if (!instr->dest.is_ssa)
320 mask = u_bit_consecutive(0, instr->num_components);
321
322 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
323 if (!lnode)
324 return false;
325
326 lnode->num_components = instr->num_components;
327 lnode->index = nir_intrinsic_base(instr);
328 if (nir_src_is_const(instr->src[0]))
329 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
330 else {
331 lnode->num_src = 1;
332 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
333 }
334
335 list_addtail(&lnode->node.list, &block->node_list);
336 return true;
337
338 case nir_intrinsic_store_output: {
339 /* In simple cases where the store_output is ssa, that register
340 * can be directly marked as the output.
341 * If discard is used or the source is not ssa, things can get a
342 * lot more complicated, so don't try to optimize those and fall
343 * back to inserting a mov at the end.
344 * If the source node will only be able to output to pipeline
345 * registers, fall back to the mov as well. */
346 assert(nir_src_is_const(instr->src[1]) &&
347 "lima doesn't support indirect outputs");
348
349 nir_io_semantics io = nir_intrinsic_io_semantics(instr);
350 unsigned offset = nir_src_as_uint(instr->src[1]);
351 unsigned slot = io.location + offset;
352 ppir_output_type out_type = ppir_nir_output_to_ppir(slot,
353 block->comp->dual_source_blend ? io.dual_source_blend_index : 0);
354 if (out_type == ppir_output_invalid) {
355 ppir_debug("Unsupported output type: %d\n", slot);
356 return false;
357 }
358
359 if (!block->comp->uses_discard && instr->src->is_ssa) {
360 node = block->comp->var_nodes[instr->src->ssa->index];
361 switch (node->op) {
362 case ppir_op_load_uniform:
363 case ppir_op_load_texture:
364 case ppir_op_const:
365 break;
366 default: {
367 ppir_dest *dest = ppir_node_get_dest(node);
368 dest->ssa.out_type = out_type;
369 node->is_out = 1;
370 return true;
371 }
372 }
373 }
374
375 alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
376 if (!alu_node)
377 return false;
378
379 ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
380 dest->type = ppir_target_ssa;
381 dest->ssa.num_components = instr->num_components;
382 dest->ssa.index = 0;
383 dest->write_mask = u_bit_consecutive(0, instr->num_components);
384 dest->ssa.out_type = out_type;
385
386 alu_node->num_src = 1;
387
388 for (int i = 0; i < instr->num_components; i++)
389 alu_node->src[0].swizzle[i] = i;
390
391 ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
392 u_bit_consecutive(0, instr->num_components));
393
394 alu_node->node.is_out = 1;
395
396 list_addtail(&alu_node->node.list, &block->node_list);
397 return true;
398 }
399
400 case nir_intrinsic_discard:
401 node = ppir_emit_discard(block, ni);
402 list_addtail(&node->list, &block->node_list);
403 return true;
404
405 case nir_intrinsic_discard_if:
406 node = ppir_emit_discard_if(block, ni);
407 list_addtail(&node->list, &block->node_list);
408 return true;
409
410 default:
411 ppir_error("unsupported nir_intrinsic_instr %s\n",
412 nir_intrinsic_infos[instr->intrinsic].name);
413 return false;
414 }
415 }
416
ppir_emit_load_const(ppir_block * block,nir_instr * ni)417 static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)
418 {
419 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
420 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
421 if (!node)
422 return false;
423
424 assert(instr->def.bit_size == 32);
425
426 for (int i = 0; i < instr->def.num_components; i++)
427 node->constant.value[i].i = instr->value[i].i32;
428 node->constant.num = instr->def.num_components;
429
430 list_addtail(&node->node.list, &block->node_list);
431 return true;
432 }
433
ppir_emit_ssa_undef(ppir_block * block,nir_instr * ni)434 static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
435 {
436 nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
437 ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
438 if (!node)
439 return false;
440 ppir_alu_node *alu = ppir_node_to_alu(node);
441
442 ppir_dest *dest = &alu->dest;
443 dest->ssa.undef = true;
444
445 list_addtail(&node->list, &block->node_list);
446 return true;
447 }
448
ppir_emit_tex(ppir_block * block,nir_instr * ni)449 static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
450 {
451 nir_tex_instr *instr = nir_instr_as_tex(ni);
452 ppir_load_texture_node *node;
453
454 switch (instr->op) {
455 case nir_texop_tex:
456 case nir_texop_txb:
457 case nir_texop_txl:
458 break;
459 default:
460 ppir_error("unsupported texop %d\n", instr->op);
461 return false;
462 }
463
464 switch (instr->sampler_dim) {
465 case GLSL_SAMPLER_DIM_1D:
466 case GLSL_SAMPLER_DIM_2D:
467 case GLSL_SAMPLER_DIM_3D:
468 case GLSL_SAMPLER_DIM_CUBE:
469 case GLSL_SAMPLER_DIM_RECT:
470 case GLSL_SAMPLER_DIM_EXTERNAL:
471 break;
472 default:
473 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
474 return false;
475 }
476
477 /* emit ld_tex node */
478
479 unsigned mask = 0;
480 if (!instr->dest.is_ssa)
481 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
482
483 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
484 if (!node)
485 return false;
486
487 node->sampler = instr->texture_index;
488 node->sampler_dim = instr->sampler_dim;
489
490 for (int i = 0; i < instr->coord_components; i++)
491 node->src[0].swizzle[i] = i;
492
493 bool perspective = false;
494
495 for (int i = 0; i < instr->num_srcs; i++) {
496 switch (instr->src[i].src_type) {
497 case nir_tex_src_backend1:
498 perspective = true;
499 FALLTHROUGH;
500 case nir_tex_src_coord: {
501 nir_src *ns = &instr->src[i].src;
502 if (ns->is_ssa) {
503 ppir_node *child = block->comp->var_nodes[ns->ssa->index];
504 if (child->op == ppir_op_load_varying) {
505 /* If the successor is load_texture, promote it to load_coords */
506 nir_tex_src *nts = (nir_tex_src *)ns;
507 if (nts->src_type == nir_tex_src_coord ||
508 nts->src_type == nir_tex_src_backend1)
509 child->op = ppir_op_load_coords;
510 }
511 }
512
513 /* src[0] is not used by the ld_tex instruction but ensures
514 * correct scheduling due to the pipeline dependency */
515 ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
516 u_bit_consecutive(0, instr->coord_components));
517 node->num_src++;
518 break;
519 }
520 case nir_tex_src_bias:
521 case nir_tex_src_lod:
522 node->lod_bias_en = true;
523 node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
524 ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
525 node->num_src++;
526 break;
527 default:
528 ppir_error("unsupported texture source type\n");
529 return false;
530 }
531 }
532
533 list_addtail(&node->node.list, &block->node_list);
534
535 /* validate load coords node */
536
537 ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node;
538 ppir_load_node *load = NULL;
539
540 if (src_coords && ppir_node_has_single_src_succ(src_coords) &&
541 (src_coords->op == ppir_op_load_coords))
542 load = ppir_node_to_load(src_coords);
543 else {
544 /* Create load_coords node */
545 load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0);
546 if (!load)
547 return false;
548 list_addtail(&load->node.list, &block->node_list);
549
550 load->src = node->src[0];
551 load->num_src = 1;
552 load->num_components = instr->coord_components;
553
554 ppir_debug("%s create load_coords node %d for %d\n",
555 __FUNCTION__, load->index, node->node.index);
556
557 ppir_node_foreach_pred_safe((&node->node), dep) {
558 ppir_node *pred = dep->pred;
559 ppir_node_remove_dep(dep);
560 ppir_node_add_dep(&load->node, pred, ppir_dep_src);
561 }
562 ppir_node_add_dep(&node->node, &load->node, ppir_dep_src);
563 }
564
565 assert(load);
566
567 if (perspective) {
568 if (instr->coord_components == 3)
569 load->perspective = ppir_perspective_z;
570 else
571 load->perspective = ppir_perspective_w;
572 }
573
574 load->sampler_dim = instr->sampler_dim;
575 node->src[0].type = load->dest.type = ppir_target_pipeline;
576 node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
577
578 return true;
579 }
580
ppir_get_block(ppir_compiler * comp,nir_block * nblock)581 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
582 {
583 ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uintptr_t)nblock);
584
585 return block;
586 }
587
ppir_emit_jump(ppir_block * block,nir_instr * ni)588 static bool ppir_emit_jump(ppir_block *block, nir_instr *ni)
589 {
590 ppir_node *node;
591 ppir_compiler *comp = block->comp;
592 ppir_branch_node *branch;
593 ppir_block *jump_block;
594 nir_jump_instr *jump = nir_instr_as_jump(ni);
595
596 switch (jump->type) {
597 case nir_jump_break: {
598 assert(comp->current_block->successors[0]);
599 assert(!comp->current_block->successors[1]);
600 jump_block = comp->current_block->successors[0];
601 }
602 break;
603 case nir_jump_continue:
604 jump_block = comp->loop_cont_block;
605 break;
606 default:
607 ppir_error("nir_jump_instr not support\n");
608 return false;
609 }
610
611 assert(jump_block != NULL);
612
613 node = ppir_node_create(block, ppir_op_branch, -1, 0);
614 if (!node)
615 return false;
616 branch = ppir_node_to_branch(node);
617
618 /* Unconditional */
619 branch->num_src = 0;
620 branch->target = jump_block;
621
622 list_addtail(&node->list, &block->node_list);
623 return true;
624 }
625
626 static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
627 [nir_instr_type_alu] = ppir_emit_alu,
628 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
629 [nir_instr_type_load_const] = ppir_emit_load_const,
630 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
631 [nir_instr_type_tex] = ppir_emit_tex,
632 [nir_instr_type_jump] = ppir_emit_jump,
633 };
634
ppir_block_create(ppir_compiler * comp)635 static ppir_block *ppir_block_create(ppir_compiler *comp)
636 {
637 ppir_block *block = rzalloc(comp, ppir_block);
638 if (!block)
639 return NULL;
640
641 list_inithead(&block->node_list);
642 list_inithead(&block->instr_list);
643
644 block->comp = comp;
645
646 return block;
647 }
648
ppir_emit_block(ppir_compiler * comp,nir_block * nblock)649 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
650 {
651 ppir_block *block = ppir_get_block(comp, nblock);
652
653 comp->current_block = block;
654
655 list_addtail(&block->list, &comp->block_list);
656
657 nir_foreach_instr(instr, nblock) {
658 assert(instr->type < nir_instr_type_phi);
659 if (!ppir_emit_instr[instr->type](block, instr))
660 return false;
661 }
662
663 return true;
664 }
665
666 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
667
ppir_emit_if(ppir_compiler * comp,nir_if * if_stmt)668 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
669 {
670 ppir_node *node;
671 ppir_branch_node *else_branch, *after_branch;
672 nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
673 bool empty_else_block =
674 (nir_else_block == nir_if_last_else_block(if_stmt) &&
675 exec_list_is_empty(&nir_else_block->instr_list));
676 ppir_block *block = comp->current_block;
677
678 node = ppir_node_create(block, ppir_op_branch, -1, 0);
679 if (!node)
680 return false;
681 else_branch = ppir_node_to_branch(node);
682 ppir_node_add_src(block->comp, node, &else_branch->src[0],
683 &if_stmt->condition, 1);
684 else_branch->num_src = 1;
685 /* Negate condition to minimize branching. We're generating following:
686 * current_block: { ...; if (!statement) branch else_block; }
687 * then_block: { ...; branch after_block; }
688 * else_block: { ... }
689 * after_block: { ... }
690 *
691 * or if else list is empty:
692 * block: { if (!statement) branch else_block; }
693 * then_block: { ... }
694 * else_block: after_block: { ... }
695 */
696 else_branch->negate = true;
697 list_addtail(&else_branch->node.list, &block->node_list);
698
699 if (!ppir_emit_cf_list(comp, &if_stmt->then_list))
700 return false;
701
702 if (empty_else_block) {
703 nir_block *nblock = nir_if_last_else_block(if_stmt);
704 assert(nblock->successors[0]);
705 assert(!nblock->successors[1]);
706 else_branch->target = ppir_get_block(comp, nblock->successors[0]);
707 /* Add empty else block to the list */
708 list_addtail(&block->successors[1]->list, &comp->block_list);
709 return true;
710 }
711
712 else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
713
714 nir_block *last_then_block = nir_if_last_then_block(if_stmt);
715 assert(last_then_block->successors[0]);
716 assert(!last_then_block->successors[1]);
717 block = ppir_get_block(comp, last_then_block);
718 node = ppir_node_create(block, ppir_op_branch, -1, 0);
719 if (!node)
720 return false;
721 after_branch = ppir_node_to_branch(node);
722 /* Unconditional */
723 after_branch->num_src = 0;
724 after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
725 /* Target should be after_block, will fixup later */
726 list_addtail(&after_branch->node.list, &block->node_list);
727
728 if (!ppir_emit_cf_list(comp, &if_stmt->else_list))
729 return false;
730
731 return true;
732 }
733
ppir_emit_loop(ppir_compiler * comp,nir_loop * nloop)734 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
735 {
736 ppir_block *save_loop_cont_block = comp->loop_cont_block;
737 ppir_block *block;
738 ppir_branch_node *loop_branch;
739 nir_block *loop_last_block;
740 ppir_node *node;
741
742 comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
743
744 if (!ppir_emit_cf_list(comp, &nloop->body))
745 return false;
746
747 loop_last_block = nir_loop_last_block(nloop);
748 block = ppir_get_block(comp, loop_last_block);
749 node = ppir_node_create(block, ppir_op_branch, -1, 0);
750 if (!node)
751 return false;
752 loop_branch = ppir_node_to_branch(node);
753 /* Unconditional */
754 loop_branch->num_src = 0;
755 loop_branch->target = comp->loop_cont_block;
756 list_addtail(&loop_branch->node.list, &block->node_list);
757
758 comp->loop_cont_block = save_loop_cont_block;
759
760 comp->num_loops++;
761
762 return true;
763 }
764
ppir_emit_function(ppir_compiler * comp,nir_function_impl * nfunc)765 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
766 {
767 ppir_error("function nir_cf_node not support\n");
768 return false;
769 }
770
ppir_emit_cf_list(ppir_compiler * comp,struct exec_list * list)771 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
772 {
773 foreach_list_typed(nir_cf_node, node, node, list) {
774 bool ret;
775
776 switch (node->type) {
777 case nir_cf_node_block:
778 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
779 break;
780 case nir_cf_node_if:
781 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
782 break;
783 case nir_cf_node_loop:
784 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
785 break;
786 case nir_cf_node_function:
787 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
788 break;
789 default:
790 ppir_error("unknown NIR node type %d\n", node->type);
791 return false;
792 }
793
794 if (!ret)
795 return false;
796 }
797
798 return true;
799 }
800
ppir_compiler_create(void * prog,unsigned num_reg,unsigned num_ssa)801 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
802 {
803 ppir_compiler *comp = rzalloc_size(
804 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
805 if (!comp)
806 return NULL;
807
808 list_inithead(&comp->block_list);
809 list_inithead(&comp->reg_list);
810 comp->reg_num = 0;
811 comp->blocks = _mesa_hash_table_u64_create(prog);
812
813 comp->var_nodes = (ppir_node **)(comp + 1);
814 comp->reg_base = num_ssa;
815 comp->prog = prog;
816
817 return comp;
818 }
819
ppir_add_ordering_deps(ppir_compiler * comp)820 static void ppir_add_ordering_deps(ppir_compiler *comp)
821 {
822 /* Some intrinsics do not have explicit dependencies and thus depend
823 * on instructions order. Consider discard_if and the is_end node as
824 * example. If we don't add fake dependency of discard_if to is_end,
825 * scheduler may put the is_end first and since is_end terminates
826 * shader on Utgard PP, rest of it will never be executed.
827 * Add fake dependencies for discard/branch/store to preserve
828 * instruction order.
829 *
830 * TODO: scheduler should schedule discard_if as early as possible otherwise
831 * we may end up with suboptimal code for cases like this:
832 *
833 * s3 = s1 < s2
834 * discard_if s3
835 * s4 = s1 + s2
836 * store s4
837 *
838 * In this case store depends on discard_if and s4, but since dependencies can
839 * be scheduled in any order it can result in code like this:
840 *
841 * instr1: s3 = s1 < s3
842 * instr2: s4 = s1 + s2
843 * instr3: discard_if s3
844 * instr4: store s4
845 */
846 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
847 ppir_node *prev_node = NULL;
848 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
849 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
850 ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
851 }
852 if (node->is_out ||
853 node->op == ppir_op_discard ||
854 node->op == ppir_op_store_temp ||
855 node->op == ppir_op_branch) {
856 prev_node = node;
857 }
858 }
859 }
860 }
861
ppir_print_shader_db(struct nir_shader * nir,ppir_compiler * comp,struct util_debug_callback * debug)862 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
863 struct util_debug_callback *debug)
864 {
865 const struct shader_info *info = &nir->info;
866 char *shaderdb;
867 ASSERTED int ret = asprintf(&shaderdb,
868 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
869 gl_shader_stage_name(info->stage),
870 comp->cur_instr_index,
871 comp->num_loops,
872 comp->num_spills,
873 comp->num_fills);
874 assert(ret >= 0);
875
876 if (lima_debug & LIMA_DEBUG_SHADERDB)
877 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
878
879 util_debug_message(debug, SHADER_INFO, "%s", shaderdb);
880 free(shaderdb);
881 }
882
ppir_add_write_after_read_deps(ppir_compiler * comp)883 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
884 {
885 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
886 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
887 ppir_node *write = NULL;
888 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
889 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
890 ppir_src *src = ppir_node_get_src(node, i);
891 if (src && src->type == ppir_target_register &&
892 src->reg == reg &&
893 write) {
894 ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
895 ppir_node_add_dep(write, node, ppir_dep_write_after_read);
896 }
897 }
898 ppir_dest *dest = ppir_node_get_dest(node);
899 if (dest && dest->type == ppir_target_register &&
900 dest->reg == reg)
901 write = node;
902 }
903 }
904 }
905 }
906
ppir_compile_nir(struct lima_fs_compiled_shader * prog,struct nir_shader * nir,struct ra_regs * ra,struct util_debug_callback * debug)907 bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir,
908 struct ra_regs *ra,
909 struct util_debug_callback *debug)
910 {
911 nir_function_impl *func = nir_shader_get_entrypoint(nir);
912 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
913 if (!comp)
914 return false;
915
916 comp->ra = ra;
917 comp->uses_discard = nir->info.fs.uses_discard;
918 comp->dual_source_blend = nir->info.fs.color_is_dual_source;
919
920 /* 1st pass: create ppir blocks */
921 nir_foreach_function(function, nir) {
922 if (!function->impl)
923 continue;
924
925 nir_foreach_block(nblock, function->impl) {
926 ppir_block *block = ppir_block_create(comp);
927 if (!block)
928 return false;
929 block->index = nblock->index;
930 _mesa_hash_table_u64_insert(comp->blocks, (uintptr_t)nblock, block);
931 }
932 }
933
934 /* 2nd pass: populate successors */
935 nir_foreach_function(function, nir) {
936 if (!function->impl)
937 continue;
938
939 nir_foreach_block(nblock, function->impl) {
940 ppir_block *block = ppir_get_block(comp, nblock);
941 assert(block);
942
943 for (int i = 0; i < 2; i++) {
944 if (nblock->successors[i])
945 block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
946 }
947 }
948 }
949
950 comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num);
951
952 /* -1 means reg is not written by the shader */
953 for (int i = 0; i < ppir_output_num; i++)
954 comp->out_type_to_reg[i] = -1;
955
956 foreach_list_typed(nir_register, reg, node, &func->registers) {
957 ppir_reg *r = rzalloc(comp, ppir_reg);
958 if (!r)
959 return false;
960
961 r->index = reg->index;
962 r->num_components = reg->num_components;
963 r->is_head = false;
964 list_addtail(&r->list, &comp->reg_list);
965 comp->reg_num++;
966 }
967
968 if (!ppir_emit_cf_list(comp, &func->body))
969 goto err_out0;
970
971 /* If we have discard block add it to the very end */
972 if (comp->discard_block)
973 list_addtail(&comp->discard_block->list, &comp->block_list);
974
975 ppir_node_print_prog(comp);
976
977 if (!ppir_lower_prog(comp))
978 goto err_out0;
979
980 ppir_add_ordering_deps(comp);
981 ppir_add_write_after_read_deps(comp);
982
983 ppir_node_print_prog(comp);
984
985 if (!ppir_node_to_instr(comp))
986 goto err_out0;
987
988 if (!ppir_schedule_prog(comp))
989 goto err_out0;
990
991 if (!ppir_regalloc_prog(comp))
992 goto err_out0;
993
994 if (!ppir_codegen_prog(comp))
995 goto err_out0;
996
997 ppir_print_shader_db(nir, comp, debug);
998
999 _mesa_hash_table_u64_destroy(comp->blocks);
1000 ralloc_free(comp);
1001 return true;
1002
1003 err_out0:
1004 _mesa_hash_table_u64_destroy(comp->blocks);
1005 ralloc_free(comp);
1006 return false;
1007 }
1008
1009