1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32 #include "nir_legacy.h"
33
34
35 #include "ppir.h"
36
ppir_node_create_ssa(ppir_block * block,ppir_op op,nir_def * ssa)37 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_def *ssa)
38 {
39 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
40 if (!node)
41 return NULL;
42
43 ppir_dest *dest = ppir_node_get_dest(node);
44 dest->type = ppir_target_ssa;
45 dest->ssa.num_components = ssa->num_components;
46 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
47
48 if (node->type == ppir_node_type_load ||
49 node->type == ppir_node_type_store)
50 dest->ssa.is_head = true;
51
52 return node;
53 }
54
ppir_node_create_reg(ppir_block * block,ppir_op op,nir_def * def,unsigned mask)55 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
56 nir_def *def, unsigned mask)
57 {
58 ppir_node *node = ppir_node_create(block, op, def->index, mask);
59 if (!node)
60 return NULL;
61
62 ppir_dest *dest = ppir_node_get_dest(node);
63
64 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
65 if (r->index == def->index) {
66 dest->reg = r;
67 break;
68 }
69 }
70
71 dest->type = ppir_target_register;
72 dest->write_mask = mask;
73
74 if (node->type == ppir_node_type_load ||
75 node->type == ppir_node_type_store)
76 dest->reg->is_head = true;
77
78 return node;
79 }
80
ppir_node_create_dest(ppir_block * block,ppir_op op,nir_legacy_dest * dest,unsigned mask)81 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
82 nir_legacy_dest *dest, unsigned mask)
83 {
84 unsigned index = -1;
85
86 if (dest) {
87 if (dest->is_ssa)
88 return ppir_node_create_ssa(block, op, dest->ssa);
89 else
90 return ppir_node_create_reg(block, op, dest->reg.handle, mask);
91 }
92
93 return ppir_node_create(block, op, index, 0);
94 }
95
ppir_node_add_src(ppir_compiler * comp,ppir_node * node,ppir_src * ps,nir_legacy_src * ns,unsigned mask)96 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
97 ppir_src *ps, nir_legacy_src *ns, unsigned mask)
98 {
99 ppir_node *child = NULL;
100
101 if (ns->is_ssa) {
102 child = comp->var_nodes[ns->ssa->index];
103 if (child->op != ppir_op_undef)
104 ppir_node_add_dep(node, child, ppir_dep_src);
105 }
106 else {
107 nir_reg_src *rs = &ns->reg;
108 while (mask) {
109 int swizzle = ps->swizzle[u_bit_scan(&mask)];
110 child = comp->var_nodes[(rs->handle->index << 2) + swizzle];
111 /* Reg is read before it was written, create a dummy node for it */
112 if (!child) {
113 child = ppir_node_create_reg(node->block, ppir_op_dummy, rs->handle,
114 u_bit_consecutive(0, 4));
115 comp->var_nodes[(rs->handle->index << 2) + swizzle] = child;
116 }
117 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
118 if (child && node != child && child->op != ppir_op_dummy)
119 ppir_node_add_dep(node, child, ppir_dep_src);
120 }
121 }
122
123 assert(child);
124 ppir_node_target_assign(ps, child);
125 }
126
127 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
128 [nir_op_mov] = ppir_op_mov,
129 [nir_op_fmul] = ppir_op_mul,
130 [nir_op_fabs] = ppir_op_abs,
131 [nir_op_fneg] = ppir_op_neg,
132 [nir_op_fadd] = ppir_op_add,
133 [nir_op_fsum3] = ppir_op_sum3,
134 [nir_op_fsum4] = ppir_op_sum4,
135 [nir_op_frsq] = ppir_op_rsqrt,
136 [nir_op_flog2] = ppir_op_log2,
137 [nir_op_fexp2] = ppir_op_exp2,
138 [nir_op_fsqrt] = ppir_op_sqrt,
139 [nir_op_fsin] = ppir_op_sin,
140 [nir_op_fcos] = ppir_op_cos,
141 [nir_op_fmax] = ppir_op_max,
142 [nir_op_fmin] = ppir_op_min,
143 [nir_op_frcp] = ppir_op_rcp,
144 [nir_op_ffloor] = ppir_op_floor,
145 [nir_op_fceil] = ppir_op_ceil,
146 [nir_op_ffract] = ppir_op_fract,
147 [nir_op_sge] = ppir_op_ge,
148 [nir_op_slt] = ppir_op_lt,
149 [nir_op_seq] = ppir_op_eq,
150 [nir_op_sne] = ppir_op_ne,
151 [nir_op_fcsel] = ppir_op_select,
152 [nir_op_inot] = ppir_op_not,
153 [nir_op_ftrunc] = ppir_op_trunc,
154 [nir_op_fsat] = ppir_op_sat,
155 [nir_op_fddx] = ppir_op_ddx,
156 [nir_op_fddy] = ppir_op_ddy,
157 };
158
ppir_emit_alu(ppir_block * block,nir_instr * ni)159 static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
160 {
161 nir_alu_instr *instr = nir_instr_as_alu(ni);
162 nir_def *def = &instr->def;
163 int op = nir_to_ppir_opcodes[instr->op];
164
165 if (op == ppir_op_unsupported) {
166 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
167 return false;
168 }
169 nir_legacy_alu_dest legacy_dest = nir_legacy_chase_alu_dest(def);
170
171 /* Don't try to translate folded fsat since their source won't be valid */
172 if (instr->op == nir_op_fsat && nir_legacy_fsat_folds(instr))
173 return true;
174
175 /* Skip folded fabs/fneg since we do not have dead code elimination */
176 if ((instr->op == nir_op_fabs || instr->op == nir_op_fneg) &&
177 nir_legacy_float_mod_folds(instr)) {
178 /* Add parent node as a the folded def node to keep
179 * the dependency chain */
180 nir_alu_src *ns = &instr->src[0];
181 ppir_node *parent = block->comp->var_nodes[ns->src.ssa->index];
182 assert(parent);
183 block->comp->var_nodes[def->index] = parent;
184 return true;
185 }
186
187 ppir_alu_node *node = ppir_node_create_dest(block, op, &legacy_dest.dest,
188 legacy_dest.write_mask);
189 if (!node)
190 return false;
191
192 ppir_dest *pd = &node->dest;
193 if (legacy_dest.fsat)
194 pd->modifier = ppir_outmod_clamp_fraction;
195
196 unsigned src_mask;
197 switch (op) {
198 case ppir_op_sum3:
199 src_mask = 0b0111;
200 break;
201 case ppir_op_sum4:
202 src_mask = 0b1111;
203 break;
204 default:
205 src_mask = pd->write_mask;
206 break;
207 }
208
209 unsigned num_child = nir_op_infos[instr->op].num_inputs;
210 node->num_src = num_child;
211
212 for (int i = 0; i < num_child; i++) {
213 nir_legacy_alu_src ns = nir_legacy_chase_alu_src(instr->src + i, true);
214 ppir_src *ps = node->src + i;
215 memcpy(ps->swizzle, ns.swizzle, sizeof(ps->swizzle));
216 ppir_node_add_src(block->comp, &node->node, ps, &ns.src, src_mask);
217
218 ps->absolute = ns.fabs;
219 ps->negate = ns.fneg;
220 }
221
222 list_addtail(&node->node.list, &block->node_list);
223 return true;
224 }
225
226 static ppir_block *ppir_block_create(ppir_compiler *comp);
227
ppir_emit_discard_block(ppir_compiler * comp)228 static bool ppir_emit_discard_block(ppir_compiler *comp)
229 {
230 ppir_block *block = ppir_block_create(comp);
231 ppir_discard_node *discard;
232 if (!block)
233 return false;
234
235 comp->discard_block = block;
236 block->comp = comp;
237
238 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
239 if (discard)
240 list_addtail(&discard->node.list, &block->node_list);
241 else
242 return false;
243
244 return true;
245 }
246
ppir_emit_discard_if(ppir_block * block,nir_instr * ni)247 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
248 {
249 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
250 ppir_node *node;
251 ppir_compiler *comp = block->comp;
252 ppir_branch_node *branch;
253
254 if (!comp->discard_block && !ppir_emit_discard_block(comp))
255 return NULL;
256
257 node = ppir_node_create(block, ppir_op_branch, -1, 0);
258 if (!node)
259 return NULL;
260 branch = ppir_node_to_branch(node);
261
262 /* second src and condition will be updated during lowering */
263 nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[0]);
264 ppir_node_add_src(block->comp, node, &branch->src[0],
265 &legacy_src, u_bit_consecutive(0, instr->num_components));
266 branch->num_src = 1;
267 branch->target = comp->discard_block;
268
269 return node;
270 }
271
ppir_emit_discard(ppir_block * block,nir_instr * ni)272 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
273 {
274 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
275
276 return node;
277 }
278
ppir_emit_intrinsic(ppir_block * block,nir_instr * ni)279 static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
280 {
281 ppir_node *node;
282 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
283 unsigned mask = 0;
284 ppir_load_node *lnode;
285 ppir_alu_node *alu_node;
286
287 switch (instr->intrinsic) {
288 case nir_intrinsic_decl_reg:
289 case nir_intrinsic_store_reg:
290 /* Nothing to do for these */
291 return true;
292
293 case nir_intrinsic_load_reg: {
294 nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
295 lnode = ppir_node_create_dest(block, ppir_op_dummy, &legacy_dest, mask);
296 return true;
297 }
298
299 case nir_intrinsic_load_input: {
300 mask = u_bit_consecutive(0, instr->num_components);
301
302 nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
303 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &legacy_dest, mask);
304 if (!lnode)
305 return false;
306
307 lnode->num_components = instr->num_components;
308 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
309 if (nir_src_is_const(instr->src[0]))
310 lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
311 else {
312 lnode->num_src = 1;
313 nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src);
314 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, &legacy_src, 1);
315 }
316 list_addtail(&lnode->node.list, &block->node_list);
317 return true;
318 }
319
320 case nir_intrinsic_load_frag_coord:
321 case nir_intrinsic_load_point_coord:
322 case nir_intrinsic_load_front_face: {
323 mask = u_bit_consecutive(0, instr->num_components);
324
325 ppir_op op;
326 switch (instr->intrinsic) {
327 case nir_intrinsic_load_frag_coord:
328 op = ppir_op_load_fragcoord;
329 break;
330 case nir_intrinsic_load_point_coord:
331 op = ppir_op_load_pointcoord;
332 break;
333 case nir_intrinsic_load_front_face:
334 op = ppir_op_load_frontface;
335 break;
336 default:
337 unreachable("bad intrinsic");
338 break;
339 }
340
341 nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
342 lnode = ppir_node_create_dest(block, op, &legacy_dest, mask);
343 if (!lnode)
344 return false;
345
346 lnode->num_components = instr->num_components;
347 list_addtail(&lnode->node.list, &block->node_list);
348 return true;
349 }
350
351 case nir_intrinsic_load_uniform: {
352 mask = u_bit_consecutive(0, instr->num_components);
353
354 nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
355 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &legacy_dest, mask);
356 if (!lnode)
357 return false;
358
359 lnode->num_components = instr->num_components;
360 lnode->index = nir_intrinsic_base(instr);
361 if (nir_src_is_const(instr->src[0]))
362 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
363 else {
364 lnode->num_src = 1;
365 nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src);
366 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, &legacy_src, 1);
367 }
368
369 list_addtail(&lnode->node.list, &block->node_list);
370 return true;
371 }
372
373 case nir_intrinsic_store_output: {
374 /* In simple cases where the store_output is ssa, that register
375 * can be directly marked as the output.
376 * If discard is used or the source is not ssa, things can get a
377 * lot more complicated, so don't try to optimize those and fall
378 * back to inserting a mov at the end.
379 * If the source node will only be able to output to pipeline
380 * registers, fall back to the mov as well. */
381 assert(nir_src_is_const(instr->src[1]) &&
382 "lima doesn't support indirect outputs");
383
384 nir_io_semantics io = nir_intrinsic_io_semantics(instr);
385 unsigned offset = nir_src_as_uint(instr->src[1]);
386 unsigned slot = io.location + offset;
387 ppir_output_type out_type = ppir_nir_output_to_ppir(slot,
388 block->comp->dual_source_blend ? io.dual_source_blend_index : 0);
389 if (out_type == ppir_output_invalid) {
390 ppir_debug("Unsupported output type: %d\n", slot);
391 return false;
392 }
393
394 if (!block->comp->uses_discard) {
395 node = block->comp->var_nodes[instr->src->ssa->index];
396 assert(node);
397 switch (node->op) {
398 case ppir_op_load_uniform:
399 case ppir_op_load_texture:
400 case ppir_op_dummy:
401 case ppir_op_const:
402 break;
403 default: {
404 ppir_dest *dest = ppir_node_get_dest(node);
405 dest->ssa.out_type = out_type;
406 node->is_out = 1;
407 return true;
408 }
409 }
410 }
411
412 alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
413 if (!alu_node)
414 return false;
415
416 ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
417 dest->type = ppir_target_ssa;
418 dest->ssa.num_components = instr->num_components;
419 dest->ssa.index = 0;
420 dest->write_mask = u_bit_consecutive(0, instr->num_components);
421 dest->ssa.out_type = out_type;
422
423 alu_node->num_src = 1;
424
425 for (int i = 0; i < instr->num_components; i++)
426 alu_node->src[0].swizzle[i] = i;
427
428 nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src);
429 ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, &legacy_src,
430 u_bit_consecutive(0, instr->num_components));
431
432 alu_node->node.is_out = 1;
433
434 list_addtail(&alu_node->node.list, &block->node_list);
435 return true;
436 }
437
438 case nir_intrinsic_discard:
439 node = ppir_emit_discard(block, ni);
440 list_addtail(&node->list, &block->node_list);
441 return true;
442
443 case nir_intrinsic_discard_if:
444 node = ppir_emit_discard_if(block, ni);
445 list_addtail(&node->list, &block->node_list);
446 return true;
447
448 default:
449 ppir_error("unsupported nir_intrinsic_instr %s\n",
450 nir_intrinsic_infos[instr->intrinsic].name);
451 return false;
452 }
453 }
454
ppir_emit_load_const(ppir_block * block,nir_instr * ni)455 static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)
456 {
457 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
458 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
459 if (!node)
460 return false;
461
462 assert(instr->def.bit_size == 32);
463
464 for (int i = 0; i < instr->def.num_components; i++)
465 node->constant.value[i].i = instr->value[i].i32;
466 node->constant.num = instr->def.num_components;
467
468 list_addtail(&node->node.list, &block->node_list);
469 return true;
470 }
471
ppir_emit_ssa_undef(ppir_block * block,nir_instr * ni)472 static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
473 {
474 nir_undef_instr *undef = nir_instr_as_undef(ni);
475 ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
476 if (!node)
477 return false;
478 ppir_alu_node *alu = ppir_node_to_alu(node);
479
480 ppir_dest *dest = &alu->dest;
481 dest->ssa.undef = true;
482
483 list_addtail(&node->list, &block->node_list);
484 return true;
485 }
486
ppir_emit_tex(ppir_block * block,nir_instr * ni)487 static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
488 {
489 nir_tex_instr *instr = nir_instr_as_tex(ni);
490 ppir_load_texture_node *node;
491
492 switch (instr->op) {
493 case nir_texop_tex:
494 case nir_texop_txb:
495 case nir_texop_txl:
496 break;
497 default:
498 ppir_error("unsupported texop %d\n", instr->op);
499 return false;
500 }
501
502 switch (instr->sampler_dim) {
503 case GLSL_SAMPLER_DIM_1D:
504 case GLSL_SAMPLER_DIM_2D:
505 case GLSL_SAMPLER_DIM_3D:
506 case GLSL_SAMPLER_DIM_CUBE:
507 case GLSL_SAMPLER_DIM_RECT:
508 case GLSL_SAMPLER_DIM_EXTERNAL:
509 break;
510 default:
511 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
512 return false;
513 }
514
515 /* emit ld_tex node */
516
517 unsigned mask = 0;
518 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
519
520 nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
521 node = ppir_node_create_dest(block, ppir_op_load_texture, &legacy_dest, mask);
522 if (!node)
523 return false;
524
525 node->sampler = instr->texture_index;
526 node->sampler_dim = instr->sampler_dim;
527
528 for (int i = 0; i < instr->coord_components; i++)
529 node->src[0].swizzle[i] = i;
530
531 bool perspective = false;
532
533 for (int i = 0; i < instr->num_srcs; i++) {
534 switch (instr->src[i].src_type) {
535 case nir_tex_src_backend1:
536 perspective = true;
537 FALLTHROUGH;
538 case nir_tex_src_coord: {
539 nir_src *ns = &instr->src[i].src;
540 ppir_node *child = block->comp->var_nodes[ns->ssa->index];
541 if (child->op == ppir_op_load_varying) {
542 /* If the successor is load_texture, promote it to load_coords */
543 nir_tex_src *nts = (nir_tex_src *)ns;
544 if (nts->src_type == nir_tex_src_coord ||
545 nts->src_type == nir_tex_src_backend1)
546 child->op = ppir_op_load_coords;
547 }
548
549 /* src[0] is not used by the ld_tex instruction but ensures
550 * correct scheduling due to the pipeline dependency */
551 nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[i].src);
552 ppir_node_add_src(block->comp, &node->node, &node->src[0], &legacy_src,
553 u_bit_consecutive(0, instr->coord_components));
554 node->num_src++;
555 break;
556 }
557 case nir_tex_src_bias:
558 case nir_tex_src_lod:
559 node->lod_bias_en = true;
560 node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
561 nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[i].src);
562 ppir_node_add_src(block->comp, &node->node, &node->src[1], &legacy_src, 1);
563 node->num_src++;
564 break;
565 default:
566 ppir_error("unsupported texture source type\n");
567 return false;
568 }
569 }
570
571 list_addtail(&node->node.list, &block->node_list);
572
573 /* validate load coords node */
574
575 ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node;
576 ppir_load_node *load = NULL;
577
578 if (src_coords && ppir_node_has_single_src_succ(src_coords) &&
579 (src_coords->op == ppir_op_load_coords))
580 load = ppir_node_to_load(src_coords);
581 else {
582 /* Create load_coords node */
583 load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0);
584 if (!load)
585 return false;
586 list_addtail(&load->node.list, &block->node_list);
587
588 load->src = node->src[0];
589 load->num_src = 1;
590 load->num_components = instr->coord_components;
591
592 ppir_debug("%s create load_coords node %d for %d\n",
593 __func__, load->index, node->node.index);
594
595 ppir_node_foreach_pred_safe((&node->node), dep) {
596 ppir_node *pred = dep->pred;
597 ppir_node_remove_dep(dep);
598 ppir_node_add_dep(&load->node, pred, ppir_dep_src);
599 }
600 ppir_node_add_dep(&node->node, &load->node, ppir_dep_src);
601 }
602
603 assert(load);
604
605 if (perspective) {
606 if (instr->coord_components == 3)
607 load->perspective = ppir_perspective_z;
608 else
609 load->perspective = ppir_perspective_w;
610 }
611
612 load->sampler_dim = instr->sampler_dim;
613 node->src[0].type = load->dest.type = ppir_target_pipeline;
614 node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
615
616 return true;
617 }
618
ppir_get_block(ppir_compiler * comp,nir_block * nblock)619 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
620 {
621 ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uintptr_t)nblock);
622
623 return block;
624 }
625
ppir_emit_jump(ppir_block * block,nir_instr * ni)626 static bool ppir_emit_jump(ppir_block *block, nir_instr *ni)
627 {
628 ppir_node *node;
629 ppir_compiler *comp = block->comp;
630 ppir_branch_node *branch;
631 ppir_block *jump_block;
632 nir_jump_instr *jump = nir_instr_as_jump(ni);
633
634 switch (jump->type) {
635 case nir_jump_break: {
636 assert(comp->current_block->successors[0]);
637 assert(!comp->current_block->successors[1]);
638 jump_block = comp->current_block->successors[0];
639 }
640 break;
641 case nir_jump_continue:
642 jump_block = comp->loop_cont_block;
643 break;
644 default:
645 ppir_error("nir_jump_instr not support\n");
646 return false;
647 }
648
649 assert(jump_block != NULL);
650
651 node = ppir_node_create(block, ppir_op_branch, -1, 0);
652 if (!node)
653 return false;
654 branch = ppir_node_to_branch(node);
655
656 /* Unconditional */
657 branch->num_src = 0;
658 branch->target = jump_block;
659
660 list_addtail(&node->list, &block->node_list);
661 return true;
662 }
663
664 static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
665 [nir_instr_type_alu] = ppir_emit_alu,
666 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
667 [nir_instr_type_load_const] = ppir_emit_load_const,
668 [nir_instr_type_undef] = ppir_emit_ssa_undef,
669 [nir_instr_type_tex] = ppir_emit_tex,
670 [nir_instr_type_jump] = ppir_emit_jump,
671 };
672
ppir_block_create(ppir_compiler * comp)673 static ppir_block *ppir_block_create(ppir_compiler *comp)
674 {
675 ppir_block *block = rzalloc(comp, ppir_block);
676 if (!block)
677 return NULL;
678
679 list_inithead(&block->node_list);
680 list_inithead(&block->instr_list);
681
682 block->comp = comp;
683
684 return block;
685 }
686
ppir_emit_block(ppir_compiler * comp,nir_block * nblock)687 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
688 {
689 ppir_block *block = ppir_get_block(comp, nblock);
690
691 comp->current_block = block;
692
693 list_addtail(&block->list, &comp->block_list);
694
695 nir_foreach_instr(instr, nblock) {
696 assert(instr->type < nir_instr_type_phi);
697 if (!ppir_emit_instr[instr->type](block, instr))
698 return false;
699 }
700
701 return true;
702 }
703
704 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
705
ppir_emit_if(ppir_compiler * comp,nir_if * if_stmt)706 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
707 {
708 ppir_node *node;
709 ppir_branch_node *else_branch, *after_branch;
710 nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
711 bool empty_else_block =
712 (nir_else_block == nir_if_last_else_block(if_stmt) &&
713 exec_list_is_empty(&nir_else_block->instr_list));
714 ppir_block *block = comp->current_block;
715
716 node = ppir_node_create(block, ppir_op_branch, -1, 0);
717 if (!node)
718 return false;
719 else_branch = ppir_node_to_branch(node);
720 nir_legacy_src legacy_src = nir_legacy_chase_src(&if_stmt->condition);
721 ppir_node_add_src(block->comp, node, &else_branch->src[0],
722 &legacy_src, 1);
723 else_branch->num_src = 1;
724 /* Negate condition to minimize branching. We're generating following:
725 * current_block: { ...; if (!statement) branch else_block; }
726 * then_block: { ...; branch after_block; }
727 * else_block: { ... }
728 * after_block: { ... }
729 *
730 * or if else list is empty:
731 * block: { if (!statement) branch else_block; }
732 * then_block: { ... }
733 * else_block: after_block: { ... }
734 */
735 else_branch->negate = true;
736 list_addtail(&else_branch->node.list, &block->node_list);
737
738 if (!ppir_emit_cf_list(comp, &if_stmt->then_list))
739 return false;
740
741 if (empty_else_block) {
742 nir_block *nblock = nir_if_last_else_block(if_stmt);
743 assert(nblock->successors[0]);
744 assert(!nblock->successors[1]);
745 else_branch->target = ppir_get_block(comp, nblock->successors[0]);
746 /* Add empty else block to the list */
747 list_addtail(&block->successors[1]->list, &comp->block_list);
748 return true;
749 }
750
751 else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
752
753 nir_block *last_then_block = nir_if_last_then_block(if_stmt);
754 assert(last_then_block->successors[0]);
755 assert(!last_then_block->successors[1]);
756 block = ppir_get_block(comp, last_then_block);
757 node = ppir_node_create(block, ppir_op_branch, -1, 0);
758 if (!node)
759 return false;
760 after_branch = ppir_node_to_branch(node);
761 /* Unconditional */
762 after_branch->num_src = 0;
763 after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
764 /* Target should be after_block, will fixup later */
765 list_addtail(&after_branch->node.list, &block->node_list);
766
767 if (!ppir_emit_cf_list(comp, &if_stmt->else_list))
768 return false;
769
770 return true;
771 }
772
ppir_emit_loop(ppir_compiler * comp,nir_loop * nloop)773 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
774 {
775 assert(!nir_loop_has_continue_construct(nloop));
776 ppir_block *save_loop_cont_block = comp->loop_cont_block;
777 ppir_block *block;
778 ppir_branch_node *loop_branch;
779 nir_block *loop_last_block;
780 ppir_node *node;
781
782 comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
783
784 if (!ppir_emit_cf_list(comp, &nloop->body))
785 return false;
786
787 loop_last_block = nir_loop_last_block(nloop);
788 block = ppir_get_block(comp, loop_last_block);
789 node = ppir_node_create(block, ppir_op_branch, -1, 0);
790 if (!node)
791 return false;
792 loop_branch = ppir_node_to_branch(node);
793 /* Unconditional */
794 loop_branch->num_src = 0;
795 loop_branch->target = comp->loop_cont_block;
796 list_addtail(&loop_branch->node.list, &block->node_list);
797
798 comp->loop_cont_block = save_loop_cont_block;
799
800 comp->num_loops++;
801
802 return true;
803 }
804
ppir_emit_function(ppir_compiler * comp,nir_function_impl * nfunc)805 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
806 {
807 ppir_error("function nir_cf_node not support\n");
808 return false;
809 }
810
ppir_emit_cf_list(ppir_compiler * comp,struct exec_list * list)811 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
812 {
813 foreach_list_typed(nir_cf_node, node, node, list) {
814 bool ret;
815
816 switch (node->type) {
817 case nir_cf_node_block:
818 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
819 break;
820 case nir_cf_node_if:
821 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
822 break;
823 case nir_cf_node_loop:
824 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
825 break;
826 case nir_cf_node_function:
827 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
828 break;
829 default:
830 ppir_error("unknown NIR node type %d\n", node->type);
831 return false;
832 }
833
834 if (!ret)
835 return false;
836 }
837
838 return true;
839 }
840
ppir_compiler_create(void * prog,unsigned num_ssa)841 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_ssa)
842 {
843 ppir_compiler *comp = rzalloc_size(
844 prog, sizeof(*comp) + (num_ssa << 2) * sizeof(ppir_node *));
845 if (!comp)
846 return NULL;
847
848 list_inithead(&comp->block_list);
849 list_inithead(&comp->reg_list);
850 comp->reg_num = 0;
851 comp->blocks = _mesa_hash_table_u64_create(prog);
852
853 comp->var_nodes = (ppir_node **)(comp + 1);
854 comp->prog = prog;
855
856 return comp;
857 }
858
ppir_add_ordering_deps(ppir_compiler * comp)859 static void ppir_add_ordering_deps(ppir_compiler *comp)
860 {
861 /* Some intrinsics do not have explicit dependencies and thus depend
862 * on instructions order. Consider discard_if and the is_end node as
863 * example. If we don't add fake dependency of discard_if to is_end,
864 * scheduler may put the is_end first and since is_end terminates
865 * shader on Utgard PP, rest of it will never be executed.
866 * Add fake dependencies for discard/branch/store to preserve
867 * instruction order.
868 *
869 * TODO: scheduler should schedule discard_if as early as possible otherwise
870 * we may end up with suboptimal code for cases like this:
871 *
872 * s3 = s1 < s2
873 * discard_if s3
874 * s4 = s1 + s2
875 * store s4
876 *
877 * In this case store depends on discard_if and s4, but since dependencies can
878 * be scheduled in any order it can result in code like this:
879 *
880 * instr1: s3 = s1 < s3
881 * instr2: s4 = s1 + s2
882 * instr3: discard_if s3
883 * instr4: store s4
884 */
885 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
886 ppir_node *prev_node = NULL;
887 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
888 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
889 ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
890 }
891 if (node->is_out ||
892 node->op == ppir_op_discard ||
893 node->op == ppir_op_store_temp ||
894 node->op == ppir_op_branch) {
895 prev_node = node;
896 }
897 }
898 }
899 }
900
ppir_print_shader_db(struct nir_shader * nir,ppir_compiler * comp,struct util_debug_callback * debug)901 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
902 struct util_debug_callback *debug)
903 {
904 const struct shader_info *info = &nir->info;
905 char *shaderdb;
906 ASSERTED int ret = asprintf(&shaderdb,
907 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
908 gl_shader_stage_name(info->stage),
909 comp->cur_instr_index,
910 comp->num_loops,
911 comp->num_spills,
912 comp->num_fills);
913 assert(ret >= 0);
914
915 if (lima_debug & LIMA_DEBUG_SHADERDB)
916 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
917
918 util_debug_message(debug, SHADER_INFO, "%s", shaderdb);
919 free(shaderdb);
920 }
921
ppir_add_write_after_read_deps(ppir_compiler * comp)922 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
923 {
924 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
925 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
926 ppir_node *write = NULL;
927 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
928 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
929 ppir_src *src = ppir_node_get_src(node, i);
930 if (src && src->type == ppir_target_register &&
931 src->reg == reg &&
932 write) {
933 ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
934 ppir_node_add_dep(write, node, ppir_dep_write_after_read);
935 }
936 }
937 ppir_dest *dest = ppir_node_get_dest(node);
938 if (dest && dest->type == ppir_target_register &&
939 dest->reg == reg)
940 write = node;
941 }
942 }
943 }
944 }
945
ppir_compile_nir(struct lima_fs_compiled_shader * prog,struct nir_shader * nir,struct ra_regs * ra,struct util_debug_callback * debug)946 bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir,
947 struct ra_regs *ra,
948 struct util_debug_callback *debug)
949 {
950 nir_function_impl *func = nir_shader_get_entrypoint(nir);
951 ppir_compiler *comp = ppir_compiler_create(prog, func->ssa_alloc);
952 if (!comp)
953 return false;
954
955 comp->ra = ra;
956 comp->uses_discard = nir->info.fs.uses_discard;
957 comp->dual_source_blend = nir->info.fs.color_is_dual_source;
958
959 /* 1st pass: create ppir blocks */
960 nir_foreach_function_impl(impl, nir) {
961 nir_foreach_block(nblock, impl) {
962 ppir_block *block = ppir_block_create(comp);
963 if (!block)
964 return false;
965 block->index = nblock->index;
966 _mesa_hash_table_u64_insert(comp->blocks, (uintptr_t)nblock, block);
967 }
968 }
969
970 /* 2nd pass: populate successors */
971 nir_foreach_function_impl(impl, nir) {
972 nir_foreach_block(nblock, impl) {
973 ppir_block *block = ppir_get_block(comp, nblock);
974 assert(block);
975
976 for (int i = 0; i < 2; i++) {
977 if (nblock->successors[i])
978 block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
979 }
980 }
981 }
982
983 comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num);
984
985 /* -1 means reg is not written by the shader */
986 for (int i = 0; i < ppir_output_num; i++)
987 comp->out_type_to_reg[i] = -1;
988
989 nir_foreach_reg_decl(decl, func) {
990 ppir_reg *r = rzalloc(comp, ppir_reg);
991 if (!r)
992 return false;
993
994 r->index = decl->def.index;
995 r->num_components = nir_intrinsic_num_components(decl);
996 r->is_head = false;
997 list_addtail(&r->list, &comp->reg_list);
998 comp->reg_num++;
999 }
1000
1001 if (!ppir_emit_cf_list(comp, &func->body))
1002 goto err_out0;
1003
1004 /* If we have discard block add it to the very end */
1005 if (comp->discard_block)
1006 list_addtail(&comp->discard_block->list, &comp->block_list);
1007
1008 ppir_node_print_prog(comp);
1009
1010 if (!ppir_lower_prog(comp))
1011 goto err_out0;
1012
1013 ppir_add_ordering_deps(comp);
1014 ppir_add_write_after_read_deps(comp);
1015
1016 ppir_node_print_prog(comp);
1017
1018 if (!ppir_node_to_instr(comp))
1019 goto err_out0;
1020
1021 if (!ppir_schedule_prog(comp))
1022 goto err_out0;
1023
1024 if (!ppir_regalloc_prog(comp))
1025 goto err_out0;
1026
1027 if (!ppir_codegen_prog(comp))
1028 goto err_out0;
1029
1030 ppir_print_shader_db(nir, comp, debug);
1031
1032 _mesa_hash_table_u64_destroy(comp->blocks);
1033 ralloc_free(comp);
1034 return true;
1035
1036 err_out0:
1037 _mesa_hash_table_u64_destroy(comp->blocks);
1038 ralloc_free(comp);
1039 return false;
1040 }
1041
1042