• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <string.h>
26 
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32 #include "nir_legacy.h"
33 
34 
35 #include "ppir.h"
36 
ppir_node_create_ssa(ppir_block * block,ppir_op op,nir_def * ssa)37 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_def *ssa)
38 {
39    ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
40    if (!node)
41       return NULL;
42 
43    ppir_dest *dest = ppir_node_get_dest(node);
44    dest->type = ppir_target_ssa;
45    dest->ssa.num_components = ssa->num_components;
46    dest->write_mask = u_bit_consecutive(0, ssa->num_components);
47 
48    if (node->type == ppir_node_type_load ||
49        node->type == ppir_node_type_store)
50       dest->ssa.is_head = true;
51 
52    return node;
53 }
54 
ppir_node_create_reg(ppir_block * block,ppir_op op,nir_def * def,unsigned mask)55 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
56                                   nir_def *def, unsigned mask)
57 {
58    ppir_node *node = ppir_node_create(block, op, def->index, mask);
59    if (!node)
60       return NULL;
61 
62    ppir_dest *dest = ppir_node_get_dest(node);
63 
64    list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
65       if (r->index == def->index) {
66          dest->reg = r;
67          break;
68       }
69    }
70 
71    dest->type = ppir_target_register;
72    dest->write_mask = mask;
73 
74    if (node->type == ppir_node_type_load ||
75        node->type == ppir_node_type_store)
76       dest->reg->is_head = true;
77 
78    return node;
79 }
80 
ppir_node_create_dest(ppir_block * block,ppir_op op,nir_legacy_dest * dest,unsigned mask)81 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
82                                    nir_legacy_dest *dest, unsigned mask)
83 {
84    unsigned index = -1;
85 
86    if (dest) {
87       if (dest->is_ssa)
88          return ppir_node_create_ssa(block, op, dest->ssa);
89       else
90          return ppir_node_create_reg(block, op, dest->reg.handle, mask);
91    }
92 
93    return ppir_node_create(block, op, index, 0);
94 }
95 
ppir_node_add_src(ppir_compiler * comp,ppir_node * node,ppir_src * ps,nir_legacy_src * ns,unsigned mask)96 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
97                               ppir_src *ps, nir_legacy_src *ns, unsigned mask)
98 {
99    ppir_node *child = NULL;
100 
101    if (ns->is_ssa) {
102       child = comp->var_nodes[ns->ssa->index];
103       if (child->op != ppir_op_undef)
104          ppir_node_add_dep(node, child, ppir_dep_src);
105    }
106    else {
107       nir_reg_src *rs = &ns->reg;
108       while (mask) {
109          int swizzle = ps->swizzle[u_bit_scan(&mask)];
110          child = comp->var_nodes[(rs->handle->index << 2) + swizzle];
111          /* Reg is read before it was written, create a dummy node for it */
112          if (!child) {
113             child = ppir_node_create_reg(node->block, ppir_op_dummy, rs->handle,
114                u_bit_consecutive(0, 4));
115             comp->var_nodes[(rs->handle->index << 2) + swizzle] = child;
116          }
117          /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
118          if (child && node != child && child->op != ppir_op_dummy)
119             ppir_node_add_dep(node, child, ppir_dep_src);
120       }
121    }
122 
123    assert(child);
124    ppir_node_target_assign(ps, child);
125 }
126 
127 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
128    [nir_op_mov] = ppir_op_mov,
129    [nir_op_fmul] = ppir_op_mul,
130    [nir_op_fabs] = ppir_op_abs,
131    [nir_op_fneg] = ppir_op_neg,
132    [nir_op_fadd] = ppir_op_add,
133    [nir_op_fsum3] = ppir_op_sum3,
134    [nir_op_fsum4] = ppir_op_sum4,
135    [nir_op_frsq] = ppir_op_rsqrt,
136    [nir_op_flog2] = ppir_op_log2,
137    [nir_op_fexp2] = ppir_op_exp2,
138    [nir_op_fsqrt] = ppir_op_sqrt,
139    [nir_op_fsin] = ppir_op_sin,
140    [nir_op_fcos] = ppir_op_cos,
141    [nir_op_fmax] = ppir_op_max,
142    [nir_op_fmin] = ppir_op_min,
143    [nir_op_frcp] = ppir_op_rcp,
144    [nir_op_ffloor] = ppir_op_floor,
145    [nir_op_fceil] = ppir_op_ceil,
146    [nir_op_ffract] = ppir_op_fract,
147    [nir_op_sge] = ppir_op_ge,
148    [nir_op_slt] = ppir_op_lt,
149    [nir_op_seq] = ppir_op_eq,
150    [nir_op_sne] = ppir_op_ne,
151    [nir_op_fcsel] = ppir_op_select,
152    [nir_op_inot] = ppir_op_not,
153    [nir_op_ftrunc] = ppir_op_trunc,
154    [nir_op_fsat] = ppir_op_sat,
155    [nir_op_fddx] = ppir_op_ddx,
156    [nir_op_fddy] = ppir_op_ddy,
157 };
158 
ppir_emit_alu(ppir_block * block,nir_instr * ni)159 static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
160 {
161    nir_alu_instr *instr = nir_instr_as_alu(ni);
162    nir_def *def = &instr->def;
163    int op = nir_to_ppir_opcodes[instr->op];
164 
165    if (op == ppir_op_unsupported) {
166       ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
167       return false;
168    }
169    nir_legacy_alu_dest legacy_dest = nir_legacy_chase_alu_dest(def);
170 
171    /* Don't try to translate folded fsat since their source won't be valid */
172    if (instr->op == nir_op_fsat && nir_legacy_fsat_folds(instr))
173       return true;
174 
175    /* Skip folded fabs/fneg since we do not have dead code elimination */
176    if ((instr->op == nir_op_fabs || instr->op == nir_op_fneg) &&
177        nir_legacy_float_mod_folds(instr)) {
178       /* Add parent node as a the folded def node to keep
179        * the dependency chain */
180       nir_alu_src *ns = &instr->src[0];
181       ppir_node *parent = block->comp->var_nodes[ns->src.ssa->index];
182       assert(parent);
183       block->comp->var_nodes[def->index] = parent;
184       return true;
185    }
186 
187    ppir_alu_node *node = ppir_node_create_dest(block, op, &legacy_dest.dest,
188                                                legacy_dest.write_mask);
189    if (!node)
190       return false;
191 
192    ppir_dest *pd = &node->dest;
193    if (legacy_dest.fsat)
194       pd->modifier = ppir_outmod_clamp_fraction;
195 
196    unsigned src_mask;
197    switch (op) {
198    case ppir_op_sum3:
199       src_mask = 0b0111;
200       break;
201    case ppir_op_sum4:
202       src_mask = 0b1111;
203       break;
204    default:
205       src_mask = pd->write_mask;
206       break;
207    }
208 
209    unsigned num_child = nir_op_infos[instr->op].num_inputs;
210    node->num_src = num_child;
211 
212    for (int i = 0; i < num_child; i++) {
213       nir_legacy_alu_src ns = nir_legacy_chase_alu_src(instr->src + i, true);
214       ppir_src *ps = node->src + i;
215       memcpy(ps->swizzle, ns.swizzle, sizeof(ps->swizzle));
216       ppir_node_add_src(block->comp, &node->node, ps, &ns.src, src_mask);
217 
218       ps->absolute = ns.fabs;
219       ps->negate = ns.fneg;
220    }
221 
222    list_addtail(&node->node.list, &block->node_list);
223    return true;
224 }
225 
226 static ppir_block *ppir_block_create(ppir_compiler *comp);
227 
ppir_emit_discard_block(ppir_compiler * comp)228 static bool ppir_emit_discard_block(ppir_compiler *comp)
229 {
230    ppir_block *block = ppir_block_create(comp);
231    ppir_discard_node *discard;
232    if (!block)
233       return false;
234 
235    comp->discard_block = block;
236    block->comp  = comp;
237 
238    discard = ppir_node_create(block, ppir_op_discard, -1, 0);
239    if (discard)
240       list_addtail(&discard->node.list, &block->node_list);
241    else
242       return false;
243 
244    return true;
245 }
246 
ppir_emit_discard_if(ppir_block * block,nir_instr * ni)247 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
248 {
249    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
250    ppir_node *node;
251    ppir_compiler *comp = block->comp;
252    ppir_branch_node *branch;
253 
254    if (!comp->discard_block && !ppir_emit_discard_block(comp))
255       return NULL;
256 
257    node = ppir_node_create(block, ppir_op_branch, -1, 0);
258    if (!node)
259       return NULL;
260    branch = ppir_node_to_branch(node);
261 
262    /* second src and condition will be updated during lowering */
263    nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[0]);
264    ppir_node_add_src(block->comp, node, &branch->src[0],
265                      &legacy_src, u_bit_consecutive(0, instr->num_components));
266    branch->num_src = 1;
267    branch->target = comp->discard_block;
268 
269    return node;
270 }
271 
ppir_emit_discard(ppir_block * block,nir_instr * ni)272 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
273 {
274    ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
275 
276    return node;
277 }
278 
ppir_emit_intrinsic(ppir_block * block,nir_instr * ni)279 static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
280 {
281    ppir_node *node;
282    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
283    unsigned mask = 0;
284    ppir_load_node *lnode;
285    ppir_alu_node *alu_node;
286 
287    switch (instr->intrinsic) {
288    case nir_intrinsic_decl_reg:
289    case nir_intrinsic_store_reg:
290       /* Nothing to do for these */
291       return true;
292 
293    case nir_intrinsic_load_reg: {
294       nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
295       lnode = ppir_node_create_dest(block, ppir_op_dummy, &legacy_dest, mask);
296       return true;
297    }
298 
299    case nir_intrinsic_load_input: {
300       mask = u_bit_consecutive(0, instr->num_components);
301 
302       nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
303       lnode = ppir_node_create_dest(block, ppir_op_load_varying, &legacy_dest, mask);
304       if (!lnode)
305          return false;
306 
307       lnode->num_components = instr->num_components;
308       lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
309       if (nir_src_is_const(instr->src[0]))
310          lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
311       else {
312          lnode->num_src = 1;
313          nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src);
314          ppir_node_add_src(block->comp, &lnode->node, &lnode->src, &legacy_src, 1);
315       }
316       list_addtail(&lnode->node.list, &block->node_list);
317       return true;
318    }
319 
320    case nir_intrinsic_load_frag_coord:
321    case nir_intrinsic_load_point_coord:
322    case nir_intrinsic_load_front_face: {
323       mask = u_bit_consecutive(0, instr->num_components);
324 
325       ppir_op op;
326       switch (instr->intrinsic) {
327       case nir_intrinsic_load_frag_coord:
328          op = ppir_op_load_fragcoord;
329          break;
330       case nir_intrinsic_load_point_coord:
331          op = ppir_op_load_pointcoord;
332          break;
333       case nir_intrinsic_load_front_face:
334          op = ppir_op_load_frontface;
335          break;
336       default:
337          unreachable("bad intrinsic");
338          break;
339       }
340 
341       nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
342       lnode = ppir_node_create_dest(block, op, &legacy_dest, mask);
343       if (!lnode)
344          return false;
345 
346       lnode->num_components = instr->num_components;
347       list_addtail(&lnode->node.list, &block->node_list);
348       return true;
349    }
350 
351    case nir_intrinsic_load_uniform: {
352       mask = u_bit_consecutive(0, instr->num_components);
353 
354       nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
355       lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &legacy_dest, mask);
356       if (!lnode)
357          return false;
358 
359       lnode->num_components = instr->num_components;
360       lnode->index = nir_intrinsic_base(instr);
361       if (nir_src_is_const(instr->src[0]))
362          lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
363       else {
364          lnode->num_src = 1;
365          nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src);
366          ppir_node_add_src(block->comp, &lnode->node, &lnode->src, &legacy_src, 1);
367       }
368 
369       list_addtail(&lnode->node.list, &block->node_list);
370       return true;
371    }
372 
373    case nir_intrinsic_store_output: {
374       /* In simple cases where the store_output is ssa, that register
375        * can be directly marked as the output.
376        * If discard is used or the source is not ssa, things can get a
377        * lot more complicated, so don't try to optimize those and fall
378        * back to inserting a mov at the end.
379        * If the source node will only be able to output to pipeline
380        * registers, fall back to the mov as well. */
381       assert(nir_src_is_const(instr->src[1]) &&
382              "lima doesn't support indirect outputs");
383 
384       nir_io_semantics io = nir_intrinsic_io_semantics(instr);
385       unsigned offset = nir_src_as_uint(instr->src[1]);
386       unsigned slot = io.location + offset;
387       ppir_output_type out_type = ppir_nir_output_to_ppir(slot,
388          block->comp->dual_source_blend ? io.dual_source_blend_index : 0);
389       if (out_type == ppir_output_invalid) {
390          ppir_debug("Unsupported output type: %d\n", slot);
391          return false;
392       }
393 
394       if (!block->comp->uses_discard) {
395          node = block->comp->var_nodes[instr->src->ssa->index];
396          assert(node);
397          switch (node->op) {
398          case ppir_op_load_uniform:
399          case ppir_op_load_texture:
400          case ppir_op_dummy:
401          case ppir_op_const:
402             break;
403          default: {
404             ppir_dest *dest = ppir_node_get_dest(node);
405             dest->ssa.out_type = out_type;
406             node->is_out = 1;
407             return true;
408             }
409          }
410       }
411 
412       alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
413       if (!alu_node)
414          return false;
415 
416       ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
417       dest->type = ppir_target_ssa;
418       dest->ssa.num_components = instr->num_components;
419       dest->ssa.index = 0;
420       dest->write_mask = u_bit_consecutive(0, instr->num_components);
421       dest->ssa.out_type = out_type;
422 
423       alu_node->num_src = 1;
424 
425       for (int i = 0; i < instr->num_components; i++)
426          alu_node->src[0].swizzle[i] = i;
427 
428       nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src);
429       ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, &legacy_src,
430                         u_bit_consecutive(0, instr->num_components));
431 
432       alu_node->node.is_out = 1;
433 
434       list_addtail(&alu_node->node.list, &block->node_list);
435       return true;
436    }
437 
438    case nir_intrinsic_discard:
439       node = ppir_emit_discard(block, ni);
440       list_addtail(&node->list, &block->node_list);
441       return true;
442 
443    case nir_intrinsic_discard_if:
444       node = ppir_emit_discard_if(block, ni);
445       list_addtail(&node->list, &block->node_list);
446       return true;
447 
448    default:
449       ppir_error("unsupported nir_intrinsic_instr %s\n",
450                  nir_intrinsic_infos[instr->intrinsic].name);
451       return false;
452    }
453 }
454 
ppir_emit_load_const(ppir_block * block,nir_instr * ni)455 static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)
456 {
457    nir_load_const_instr *instr = nir_instr_as_load_const(ni);
458    ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
459    if (!node)
460       return false;
461 
462    assert(instr->def.bit_size == 32);
463 
464    for (int i = 0; i < instr->def.num_components; i++)
465       node->constant.value[i].i = instr->value[i].i32;
466    node->constant.num = instr->def.num_components;
467 
468    list_addtail(&node->node.list, &block->node_list);
469    return true;
470 }
471 
ppir_emit_ssa_undef(ppir_block * block,nir_instr * ni)472 static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
473 {
474    nir_undef_instr *undef = nir_instr_as_undef(ni);
475    ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
476    if (!node)
477       return false;
478    ppir_alu_node *alu = ppir_node_to_alu(node);
479 
480    ppir_dest *dest = &alu->dest;
481    dest->ssa.undef = true;
482 
483    list_addtail(&node->list, &block->node_list);
484    return true;
485 }
486 
ppir_emit_tex(ppir_block * block,nir_instr * ni)487 static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
488 {
489    nir_tex_instr *instr = nir_instr_as_tex(ni);
490    ppir_load_texture_node *node;
491 
492    switch (instr->op) {
493    case nir_texop_tex:
494    case nir_texop_txb:
495    case nir_texop_txl:
496       break;
497    default:
498       ppir_error("unsupported texop %d\n", instr->op);
499       return false;
500    }
501 
502    switch (instr->sampler_dim) {
503    case GLSL_SAMPLER_DIM_1D:
504    case GLSL_SAMPLER_DIM_2D:
505    case GLSL_SAMPLER_DIM_3D:
506    case GLSL_SAMPLER_DIM_CUBE:
507    case GLSL_SAMPLER_DIM_RECT:
508    case GLSL_SAMPLER_DIM_EXTERNAL:
509       break;
510    default:
511       ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
512       return false;
513    }
514 
515    /* emit ld_tex node */
516 
517    unsigned mask = 0;
518    mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
519 
520    nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
521    node = ppir_node_create_dest(block, ppir_op_load_texture, &legacy_dest, mask);
522    if (!node)
523       return false;
524 
525    node->sampler = instr->texture_index;
526    node->sampler_dim = instr->sampler_dim;
527 
528    for (int i = 0; i < instr->coord_components; i++)
529          node->src[0].swizzle[i] = i;
530 
531    bool perspective = false;
532 
533    for (int i = 0; i < instr->num_srcs; i++) {
534       switch (instr->src[i].src_type) {
535       case nir_tex_src_backend1:
536          perspective = true;
537          FALLTHROUGH;
538       case nir_tex_src_coord: {
539          nir_src *ns = &instr->src[i].src;
540          ppir_node *child = block->comp->var_nodes[ns->ssa->index];
541          if (child->op == ppir_op_load_varying) {
542             /* If the successor is load_texture, promote it to load_coords */
543             nir_tex_src *nts = (nir_tex_src *)ns;
544             if (nts->src_type == nir_tex_src_coord ||
545                 nts->src_type == nir_tex_src_backend1)
546                child->op = ppir_op_load_coords;
547          }
548 
549          /* src[0] is not used by the ld_tex instruction but ensures
550           * correct scheduling due to the pipeline dependency */
551          nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[i].src);
552          ppir_node_add_src(block->comp, &node->node, &node->src[0], &legacy_src,
553                            u_bit_consecutive(0, instr->coord_components));
554          node->num_src++;
555          break;
556       }
557       case nir_tex_src_bias:
558       case nir_tex_src_lod:
559          node->lod_bias_en = true;
560          node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
561          nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[i].src);
562          ppir_node_add_src(block->comp, &node->node, &node->src[1], &legacy_src, 1);
563          node->num_src++;
564          break;
565       default:
566          ppir_error("unsupported texture source type\n");
567          return false;
568       }
569    }
570 
571    list_addtail(&node->node.list, &block->node_list);
572 
573    /* validate load coords node */
574 
575    ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node;
576    ppir_load_node *load = NULL;
577 
578    if (src_coords && ppir_node_has_single_src_succ(src_coords) &&
579        (src_coords->op == ppir_op_load_coords))
580       load = ppir_node_to_load(src_coords);
581    else {
582       /* Create load_coords node */
583       load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0);
584       if (!load)
585          return false;
586       list_addtail(&load->node.list, &block->node_list);
587 
588       load->src = node->src[0];
589       load->num_src = 1;
590       load->num_components = instr->coord_components;
591 
592       ppir_debug("%s create load_coords node %d for %d\n",
593                  __func__, load->index, node->node.index);
594 
595       ppir_node_foreach_pred_safe((&node->node), dep) {
596          ppir_node *pred = dep->pred;
597          ppir_node_remove_dep(dep);
598          ppir_node_add_dep(&load->node, pred, ppir_dep_src);
599       }
600       ppir_node_add_dep(&node->node, &load->node, ppir_dep_src);
601    }
602 
603    assert(load);
604 
605    if (perspective) {
606       if (instr->coord_components == 3)
607          load->perspective = ppir_perspective_z;
608       else
609          load->perspective = ppir_perspective_w;
610    }
611 
612    load->sampler_dim = instr->sampler_dim;
613    node->src[0].type = load->dest.type = ppir_target_pipeline;
614    node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
615 
616    return true;
617 }
618 
ppir_get_block(ppir_compiler * comp,nir_block * nblock)619 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
620 {
621    ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uintptr_t)nblock);
622 
623    return block;
624 }
625 
ppir_emit_jump(ppir_block * block,nir_instr * ni)626 static bool ppir_emit_jump(ppir_block *block, nir_instr *ni)
627 {
628    ppir_node *node;
629    ppir_compiler *comp = block->comp;
630    ppir_branch_node *branch;
631    ppir_block *jump_block;
632    nir_jump_instr *jump = nir_instr_as_jump(ni);
633 
634    switch (jump->type) {
635    case nir_jump_break: {
636       assert(comp->current_block->successors[0]);
637       assert(!comp->current_block->successors[1]);
638       jump_block = comp->current_block->successors[0];
639    }
640    break;
641    case nir_jump_continue:
642       jump_block = comp->loop_cont_block;
643    break;
644    default:
645       ppir_error("nir_jump_instr not support\n");
646       return false;
647    }
648 
649    assert(jump_block != NULL);
650 
651    node = ppir_node_create(block, ppir_op_branch, -1, 0);
652    if (!node)
653       return false;
654    branch = ppir_node_to_branch(node);
655 
656    /* Unconditional */
657    branch->num_src = 0;
658    branch->target = jump_block;
659 
660    list_addtail(&node->list, &block->node_list);
661    return true;
662 }
663 
664 static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
665    [nir_instr_type_alu]        = ppir_emit_alu,
666    [nir_instr_type_intrinsic]  = ppir_emit_intrinsic,
667    [nir_instr_type_load_const] = ppir_emit_load_const,
668    [nir_instr_type_undef]      = ppir_emit_ssa_undef,
669    [nir_instr_type_tex]        = ppir_emit_tex,
670    [nir_instr_type_jump]       = ppir_emit_jump,
671 };
672 
ppir_block_create(ppir_compiler * comp)673 static ppir_block *ppir_block_create(ppir_compiler *comp)
674 {
675    ppir_block *block = rzalloc(comp, ppir_block);
676    if (!block)
677       return NULL;
678 
679    list_inithead(&block->node_list);
680    list_inithead(&block->instr_list);
681 
682    block->comp = comp;
683 
684    return block;
685 }
686 
ppir_emit_block(ppir_compiler * comp,nir_block * nblock)687 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
688 {
689    ppir_block *block = ppir_get_block(comp, nblock);
690 
691    comp->current_block = block;
692 
693    list_addtail(&block->list, &comp->block_list);
694 
695    nir_foreach_instr(instr, nblock) {
696       assert(instr->type < nir_instr_type_phi);
697       if (!ppir_emit_instr[instr->type](block, instr))
698          return false;
699    }
700 
701    return true;
702 }
703 
704 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
705 
ppir_emit_if(ppir_compiler * comp,nir_if * if_stmt)706 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
707 {
708    ppir_node *node;
709    ppir_branch_node *else_branch, *after_branch;
710    nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
711    bool empty_else_block =
712       (nir_else_block == nir_if_last_else_block(if_stmt) &&
713       exec_list_is_empty(&nir_else_block->instr_list));
714    ppir_block *block = comp->current_block;
715 
716    node = ppir_node_create(block, ppir_op_branch, -1, 0);
717    if (!node)
718       return false;
719    else_branch = ppir_node_to_branch(node);
720    nir_legacy_src legacy_src = nir_legacy_chase_src(&if_stmt->condition);
721    ppir_node_add_src(block->comp, node, &else_branch->src[0],
722                      &legacy_src, 1);
723    else_branch->num_src = 1;
724    /* Negate condition to minimize branching. We're generating following:
725     * current_block: { ...; if (!statement) branch else_block; }
726     * then_block: { ...; branch after_block; }
727     * else_block: { ... }
728     * after_block: { ... }
729     *
730     * or if else list is empty:
731     * block: { if (!statement) branch else_block; }
732     * then_block: { ... }
733     * else_block: after_block: { ... }
734     */
735    else_branch->negate = true;
736    list_addtail(&else_branch->node.list, &block->node_list);
737 
738    if (!ppir_emit_cf_list(comp, &if_stmt->then_list))
739       return false;
740 
741    if (empty_else_block) {
742       nir_block *nblock = nir_if_last_else_block(if_stmt);
743       assert(nblock->successors[0]);
744       assert(!nblock->successors[1]);
745       else_branch->target = ppir_get_block(comp, nblock->successors[0]);
746       /* Add empty else block to the list */
747       list_addtail(&block->successors[1]->list, &comp->block_list);
748       return true;
749    }
750 
751    else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
752 
753    nir_block *last_then_block = nir_if_last_then_block(if_stmt);
754    assert(last_then_block->successors[0]);
755    assert(!last_then_block->successors[1]);
756    block = ppir_get_block(comp, last_then_block);
757    node = ppir_node_create(block, ppir_op_branch, -1, 0);
758    if (!node)
759       return false;
760    after_branch = ppir_node_to_branch(node);
761    /* Unconditional */
762    after_branch->num_src = 0;
763    after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
764    /* Target should be after_block, will fixup later */
765    list_addtail(&after_branch->node.list, &block->node_list);
766 
767    if (!ppir_emit_cf_list(comp, &if_stmt->else_list))
768       return false;
769 
770    return true;
771 }
772 
ppir_emit_loop(ppir_compiler * comp,nir_loop * nloop)773 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
774 {
775    assert(!nir_loop_has_continue_construct(nloop));
776    ppir_block *save_loop_cont_block = comp->loop_cont_block;
777    ppir_block *block;
778    ppir_branch_node *loop_branch;
779    nir_block *loop_last_block;
780    ppir_node *node;
781 
782    comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
783 
784    if (!ppir_emit_cf_list(comp, &nloop->body))
785       return false;
786 
787    loop_last_block = nir_loop_last_block(nloop);
788    block = ppir_get_block(comp, loop_last_block);
789    node = ppir_node_create(block, ppir_op_branch, -1, 0);
790    if (!node)
791       return false;
792    loop_branch = ppir_node_to_branch(node);
793    /* Unconditional */
794    loop_branch->num_src = 0;
795    loop_branch->target = comp->loop_cont_block;
796    list_addtail(&loop_branch->node.list, &block->node_list);
797 
798    comp->loop_cont_block = save_loop_cont_block;
799 
800    comp->num_loops++;
801 
802    return true;
803 }
804 
ppir_emit_function(ppir_compiler * comp,nir_function_impl * nfunc)805 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
806 {
807    ppir_error("function nir_cf_node not support\n");
808    return false;
809 }
810 
ppir_emit_cf_list(ppir_compiler * comp,struct exec_list * list)811 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
812 {
813    foreach_list_typed(nir_cf_node, node, node, list) {
814       bool ret;
815 
816       switch (node->type) {
817       case nir_cf_node_block:
818          ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
819          break;
820       case nir_cf_node_if:
821          ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
822          break;
823       case nir_cf_node_loop:
824          ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
825          break;
826       case nir_cf_node_function:
827          ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
828          break;
829       default:
830          ppir_error("unknown NIR node type %d\n", node->type);
831          return false;
832       }
833 
834       if (!ret)
835          return false;
836    }
837 
838    return true;
839 }
840 
ppir_compiler_create(void * prog,unsigned num_ssa)841 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_ssa)
842 {
843    ppir_compiler *comp = rzalloc_size(
844       prog, sizeof(*comp) + (num_ssa << 2) * sizeof(ppir_node *));
845    if (!comp)
846       return NULL;
847 
848    list_inithead(&comp->block_list);
849    list_inithead(&comp->reg_list);
850    comp->reg_num = 0;
851    comp->blocks = _mesa_hash_table_u64_create(prog);
852 
853    comp->var_nodes = (ppir_node **)(comp + 1);
854    comp->prog = prog;
855 
856    return comp;
857 }
858 
ppir_add_ordering_deps(ppir_compiler * comp)859 static void ppir_add_ordering_deps(ppir_compiler *comp)
860 {
861    /* Some intrinsics do not have explicit dependencies and thus depend
862     * on instructions order. Consider discard_if and the is_end node as
863     * example. If we don't add fake dependency of discard_if to is_end,
864     * scheduler may put the is_end first and since is_end terminates
865     * shader on Utgard PP, rest of it will never be executed.
866     * Add fake dependencies for discard/branch/store to preserve
867     * instruction order.
868     *
869     * TODO: scheduler should schedule discard_if as early as possible otherwise
870     * we may end up with suboptimal code for cases like this:
871     *
872     * s3 = s1 < s2
873     * discard_if s3
874     * s4 = s1 + s2
875     * store s4
876     *
877     * In this case store depends on discard_if and s4, but since dependencies can
878     * be scheduled in any order it can result in code like this:
879     *
880     * instr1: s3 = s1 < s3
881     * instr2: s4 = s1 + s2
882     * instr3: discard_if s3
883     * instr4: store s4
884     */
885    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
886       ppir_node *prev_node = NULL;
887       list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
888          if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
889             ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
890          }
891          if (node->is_out ||
892              node->op == ppir_op_discard ||
893              node->op == ppir_op_store_temp ||
894              node->op == ppir_op_branch) {
895             prev_node = node;
896          }
897       }
898    }
899 }
900 
ppir_print_shader_db(struct nir_shader * nir,ppir_compiler * comp,struct util_debug_callback * debug)901 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
902                                  struct util_debug_callback *debug)
903 {
904    const struct shader_info *info = &nir->info;
905    char *shaderdb;
906    ASSERTED int ret = asprintf(&shaderdb,
907                                "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
908                                gl_shader_stage_name(info->stage),
909                                comp->cur_instr_index,
910                                comp->num_loops,
911                                comp->num_spills,
912                                comp->num_fills);
913    assert(ret >= 0);
914 
915    if (lima_debug & LIMA_DEBUG_SHADERDB)
916       fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
917 
918    util_debug_message(debug, SHADER_INFO, "%s", shaderdb);
919    free(shaderdb);
920 }
921 
ppir_add_write_after_read_deps(ppir_compiler * comp)922 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
923 {
924    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
925       list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
926          ppir_node *write = NULL;
927          list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
928             for (int i = 0; i < ppir_node_get_src_num(node); i++) {
929                ppir_src *src = ppir_node_get_src(node, i);
930                if (src && src->type == ppir_target_register &&
931                    src->reg == reg &&
932                    write) {
933                   ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
934                   ppir_node_add_dep(write, node, ppir_dep_write_after_read);
935                }
936             }
937             ppir_dest *dest = ppir_node_get_dest(node);
938             if (dest && dest->type == ppir_target_register &&
939                 dest->reg == reg)
940                write = node;
941          }
942       }
943    }
944 }
945 
ppir_compile_nir(struct lima_fs_compiled_shader * prog,struct nir_shader * nir,struct ra_regs * ra,struct util_debug_callback * debug)946 bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir,
947                       struct ra_regs *ra,
948                       struct util_debug_callback *debug)
949 {
950    nir_function_impl *func = nir_shader_get_entrypoint(nir);
951    ppir_compiler *comp = ppir_compiler_create(prog, func->ssa_alloc);
952    if (!comp)
953       return false;
954 
955    comp->ra = ra;
956    comp->uses_discard = nir->info.fs.uses_discard;
957    comp->dual_source_blend = nir->info.fs.color_is_dual_source;
958 
959    /* 1st pass: create ppir blocks */
960    nir_foreach_function_impl(impl, nir) {
961       nir_foreach_block(nblock, impl) {
962          ppir_block *block = ppir_block_create(comp);
963          if (!block)
964             return false;
965          block->index = nblock->index;
966          _mesa_hash_table_u64_insert(comp->blocks, (uintptr_t)nblock, block);
967       }
968    }
969 
970    /* 2nd pass: populate successors */
971    nir_foreach_function_impl(impl, nir) {
972       nir_foreach_block(nblock, impl) {
973          ppir_block *block = ppir_get_block(comp, nblock);
974          assert(block);
975 
976          for (int i = 0; i < 2; i++) {
977             if (nblock->successors[i])
978                block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
979          }
980       }
981    }
982 
983    comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num);
984 
985    /* -1 means reg is not written by the shader */
986    for (int i = 0; i < ppir_output_num; i++)
987       comp->out_type_to_reg[i] = -1;
988 
989    nir_foreach_reg_decl(decl, func) {
990       ppir_reg *r = rzalloc(comp, ppir_reg);
991       if (!r)
992          return false;
993 
994       r->index = decl->def.index;
995       r->num_components = nir_intrinsic_num_components(decl);
996       r->is_head = false;
997       list_addtail(&r->list, &comp->reg_list);
998       comp->reg_num++;
999    }
1000 
1001    if (!ppir_emit_cf_list(comp, &func->body))
1002       goto err_out0;
1003 
1004    /* If we have discard block add it to the very end */
1005    if (comp->discard_block)
1006       list_addtail(&comp->discard_block->list, &comp->block_list);
1007 
1008    ppir_node_print_prog(comp);
1009 
1010    if (!ppir_lower_prog(comp))
1011       goto err_out0;
1012 
1013    ppir_add_ordering_deps(comp);
1014    ppir_add_write_after_read_deps(comp);
1015 
1016    ppir_node_print_prog(comp);
1017 
1018    if (!ppir_node_to_instr(comp))
1019       goto err_out0;
1020 
1021    if (!ppir_schedule_prog(comp))
1022       goto err_out0;
1023 
1024    if (!ppir_regalloc_prog(comp))
1025       goto err_out0;
1026 
1027    if (!ppir_codegen_prog(comp))
1028       goto err_out0;
1029 
1030    ppir_print_shader_db(nir, comp, debug);
1031 
1032    _mesa_hash_table_u64_destroy(comp->blocks);
1033    ralloc_free(comp);
1034    return true;
1035 
1036 err_out0:
1037    _mesa_hash_table_u64_destroy(comp->blocks);
1038    ralloc_free(comp);
1039    return false;
1040 }
1041 
1042