• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <string.h>
26 
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32 
33 
34 #include "ppir.h"
35 
ppir_node_create_ssa(ppir_block * block,ppir_op op,nir_ssa_def * ssa)36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37 {
38    ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39    if (!node)
40       return NULL;
41 
42    ppir_dest *dest = ppir_node_get_dest(node);
43    dest->type = ppir_target_ssa;
44    dest->ssa.num_components = ssa->num_components;
45    dest->write_mask = u_bit_consecutive(0, ssa->num_components);
46 
47    if (node->type == ppir_node_type_load ||
48        node->type == ppir_node_type_store)
49       dest->ssa.is_head = true;
50 
51    return node;
52 }
53 
ppir_node_create_reg(ppir_block * block,ppir_op op,nir_register * reg,unsigned mask)54 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
55                                   nir_register *reg, unsigned mask)
56 {
57    ppir_node *node = ppir_node_create(block, op, reg->index, mask);
58    if (!node)
59       return NULL;
60 
61    ppir_dest *dest = ppir_node_get_dest(node);
62 
63    list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
64       if (r->index == reg->index) {
65          dest->reg = r;
66          break;
67       }
68    }
69 
70    dest->type = ppir_target_register;
71    dest->write_mask = mask;
72 
73    if (node->type == ppir_node_type_load ||
74        node->type == ppir_node_type_store)
75       dest->reg->is_head = true;
76 
77    return node;
78 }
79 
ppir_node_create_dest(ppir_block * block,ppir_op op,nir_dest * dest,unsigned mask)80 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
81                                    nir_dest *dest, unsigned mask)
82 {
83    unsigned index = -1;
84 
85    if (dest) {
86       if (dest->is_ssa)
87          return ppir_node_create_ssa(block, op, &dest->ssa);
88       else
89          return ppir_node_create_reg(block, op, dest->reg.reg, mask);
90    }
91 
92    return ppir_node_create(block, op, index, 0);
93 }
94 
ppir_node_add_src(ppir_compiler * comp,ppir_node * node,ppir_src * ps,nir_src * ns,unsigned mask)95 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
96                               ppir_src *ps, nir_src *ns, unsigned mask)
97 {
98    ppir_node *child = NULL;
99 
100    if (ns->is_ssa) {
101       child = comp->var_nodes[ns->ssa->index];
102       if (child->op != ppir_op_undef)
103          ppir_node_add_dep(node, child, ppir_dep_src);
104    }
105    else {
106       nir_register *reg = ns->reg.reg;
107       while (mask) {
108          int swizzle = ps->swizzle[u_bit_scan(&mask)];
109          child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
110          /* Reg is read before it was written, create a dummy node for it */
111          if (!child) {
112             child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,
113                u_bit_consecutive(0, 4));
114             comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
115          }
116          /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
117          if (child && node != child && child->op != ppir_op_dummy)
118             ppir_node_add_dep(node, child, ppir_dep_src);
119       }
120    }
121 
122    assert(child);
123    ppir_node_target_assign(ps, child);
124 }
125 
126 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
127    [nir_op_mov] = ppir_op_mov,
128    [nir_op_fmul] = ppir_op_mul,
129    [nir_op_fabs] = ppir_op_abs,
130    [nir_op_fneg] = ppir_op_neg,
131    [nir_op_fadd] = ppir_op_add,
132    [nir_op_fsum3] = ppir_op_sum3,
133    [nir_op_fsum4] = ppir_op_sum4,
134    [nir_op_frsq] = ppir_op_rsqrt,
135    [nir_op_flog2] = ppir_op_log2,
136    [nir_op_fexp2] = ppir_op_exp2,
137    [nir_op_fsqrt] = ppir_op_sqrt,
138    [nir_op_fsin] = ppir_op_sin,
139    [nir_op_fcos] = ppir_op_cos,
140    [nir_op_fmax] = ppir_op_max,
141    [nir_op_fmin] = ppir_op_min,
142    [nir_op_frcp] = ppir_op_rcp,
143    [nir_op_ffloor] = ppir_op_floor,
144    [nir_op_fceil] = ppir_op_ceil,
145    [nir_op_ffract] = ppir_op_fract,
146    [nir_op_sge] = ppir_op_ge,
147    [nir_op_slt] = ppir_op_lt,
148    [nir_op_seq] = ppir_op_eq,
149    [nir_op_sne] = ppir_op_ne,
150    [nir_op_fcsel] = ppir_op_select,
151    [nir_op_inot] = ppir_op_not,
152    [nir_op_ftrunc] = ppir_op_trunc,
153    [nir_op_fsat] = ppir_op_sat,
154    [nir_op_fddx] = ppir_op_ddx,
155    [nir_op_fddy] = ppir_op_ddy,
156 };
157 
ppir_emit_alu(ppir_block * block,nir_instr * ni)158 static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
159 {
160    nir_alu_instr *instr = nir_instr_as_alu(ni);
161    int op = nir_to_ppir_opcodes[instr->op];
162 
163    if (op == ppir_op_unsupported) {
164       ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
165       return false;
166    }
167 
168    ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
169                                                instr->dest.write_mask);
170    if (!node)
171       return false;
172 
173    ppir_dest *pd = &node->dest;
174    nir_alu_dest *nd = &instr->dest;
175    if (nd->saturate)
176       pd->modifier = ppir_outmod_clamp_fraction;
177 
178    unsigned src_mask;
179    switch (op) {
180    case ppir_op_sum3:
181       src_mask = 0b0111;
182       break;
183    case ppir_op_sum4:
184       src_mask = 0b1111;
185       break;
186    default:
187       src_mask = pd->write_mask;
188       break;
189    }
190 
191    unsigned num_child = nir_op_infos[instr->op].num_inputs;
192    node->num_src = num_child;
193 
194    for (int i = 0; i < num_child; i++) {
195       nir_alu_src *ns = instr->src + i;
196       ppir_src *ps = node->src + i;
197       memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
198       ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
199 
200       ps->absolute = ns->abs;
201       ps->negate = ns->negate;
202    }
203 
204    list_addtail(&node->node.list, &block->node_list);
205    return true;
206 }
207 
208 static ppir_block *ppir_block_create(ppir_compiler *comp);
209 
ppir_emit_discard_block(ppir_compiler * comp)210 static bool ppir_emit_discard_block(ppir_compiler *comp)
211 {
212    ppir_block *block = ppir_block_create(comp);
213    ppir_discard_node *discard;
214    if (!block)
215       return false;
216 
217    comp->discard_block = block;
218    block->comp  = comp;
219 
220    discard = ppir_node_create(block, ppir_op_discard, -1, 0);
221    if (discard)
222       list_addtail(&discard->node.list, &block->node_list);
223    else
224       return false;
225 
226    return true;
227 }
228 
ppir_emit_discard_if(ppir_block * block,nir_instr * ni)229 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
230 {
231    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
232    ppir_node *node;
233    ppir_compiler *comp = block->comp;
234    ppir_branch_node *branch;
235 
236    if (!comp->discard_block && !ppir_emit_discard_block(comp))
237       return NULL;
238 
239    node = ppir_node_create(block, ppir_op_branch, -1, 0);
240    if (!node)
241       return NULL;
242    branch = ppir_node_to_branch(node);
243 
244    /* second src and condition will be updated during lowering */
245    ppir_node_add_src(block->comp, node, &branch->src[0],
246                      &instr->src[0], u_bit_consecutive(0, instr->num_components));
247    branch->num_src = 1;
248    branch->target = comp->discard_block;
249 
250    return node;
251 }
252 
ppir_emit_discard(ppir_block * block,nir_instr * ni)253 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
254 {
255    ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
256 
257    return node;
258 }
259 
ppir_emit_intrinsic(ppir_block * block,nir_instr * ni)260 static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
261 {
262    ppir_node *node;
263    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
264    unsigned mask = 0;
265    ppir_load_node *lnode;
266    ppir_alu_node *alu_node;
267 
268    switch (instr->intrinsic) {
269    case nir_intrinsic_load_input:
270       if (!instr->dest.is_ssa)
271          mask = u_bit_consecutive(0, instr->num_components);
272 
273       lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
274       if (!lnode)
275          return false;
276 
277       lnode->num_components = instr->num_components;
278       lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
279       if (nir_src_is_const(instr->src[0]))
280          lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
281       else {
282          lnode->num_src = 1;
283          ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
284       }
285       list_addtail(&lnode->node.list, &block->node_list);
286       return true;
287 
288    case nir_intrinsic_load_frag_coord:
289    case nir_intrinsic_load_point_coord:
290    case nir_intrinsic_load_front_face:
291       if (!instr->dest.is_ssa)
292          mask = u_bit_consecutive(0, instr->num_components);
293 
294       ppir_op op;
295       switch (instr->intrinsic) {
296       case nir_intrinsic_load_frag_coord:
297          op = ppir_op_load_fragcoord;
298          break;
299       case nir_intrinsic_load_point_coord:
300          op = ppir_op_load_pointcoord;
301          break;
302       case nir_intrinsic_load_front_face:
303          op = ppir_op_load_frontface;
304          break;
305       default:
306          unreachable("bad intrinsic");
307          break;
308       }
309 
310       lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
311       if (!lnode)
312          return false;
313 
314       lnode->num_components = instr->num_components;
315       list_addtail(&lnode->node.list, &block->node_list);
316       return true;
317 
318    case nir_intrinsic_load_uniform:
319       if (!instr->dest.is_ssa)
320          mask = u_bit_consecutive(0, instr->num_components);
321 
322       lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
323       if (!lnode)
324          return false;
325 
326       lnode->num_components = instr->num_components;
327       lnode->index = nir_intrinsic_base(instr);
328       if (nir_src_is_const(instr->src[0]))
329          lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
330       else {
331          lnode->num_src = 1;
332          ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
333       }
334 
335       list_addtail(&lnode->node.list, &block->node_list);
336       return true;
337 
338    case nir_intrinsic_store_output: {
339       /* In simple cases where the store_output is ssa, that register
340        * can be directly marked as the output.
341        * If discard is used or the source is not ssa, things can get a
342        * lot more complicated, so don't try to optimize those and fall
343        * back to inserting a mov at the end.
344        * If the source node will only be able to output to pipeline
345        * registers, fall back to the mov as well. */
346       assert(nir_src_is_const(instr->src[1]) &&
347              "lima doesn't support indirect outputs");
348 
349       nir_io_semantics io = nir_intrinsic_io_semantics(instr);
350       unsigned offset = nir_src_as_uint(instr->src[1]);
351       unsigned slot = io.location + offset;
352       ppir_output_type out_type = ppir_nir_output_to_ppir(slot,
353          block->comp->dual_source_blend ? io.dual_source_blend_index : 0);
354       if (out_type == ppir_output_invalid) {
355          ppir_debug("Unsupported output type: %d\n", slot);
356          return false;
357       }
358 
359       if (!block->comp->uses_discard && instr->src->is_ssa) {
360          node = block->comp->var_nodes[instr->src->ssa->index];
361          switch (node->op) {
362          case ppir_op_load_uniform:
363          case ppir_op_load_texture:
364          case ppir_op_const:
365             break;
366          default: {
367             ppir_dest *dest = ppir_node_get_dest(node);
368             dest->ssa.out_type = out_type;
369             node->is_out = 1;
370             return true;
371             }
372          }
373       }
374 
375       alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
376       if (!alu_node)
377          return false;
378 
379       ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
380       dest->type = ppir_target_ssa;
381       dest->ssa.num_components = instr->num_components;
382       dest->ssa.index = 0;
383       dest->write_mask = u_bit_consecutive(0, instr->num_components);
384       dest->ssa.out_type = out_type;
385 
386       alu_node->num_src = 1;
387 
388       for (int i = 0; i < instr->num_components; i++)
389          alu_node->src[0].swizzle[i] = i;
390 
391       ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
392                         u_bit_consecutive(0, instr->num_components));
393 
394       alu_node->node.is_out = 1;
395 
396       list_addtail(&alu_node->node.list, &block->node_list);
397       return true;
398    }
399 
400    case nir_intrinsic_discard:
401       node = ppir_emit_discard(block, ni);
402       list_addtail(&node->list, &block->node_list);
403       return true;
404 
405    case nir_intrinsic_discard_if:
406       node = ppir_emit_discard_if(block, ni);
407       list_addtail(&node->list, &block->node_list);
408       return true;
409 
410    default:
411       ppir_error("unsupported nir_intrinsic_instr %s\n",
412                  nir_intrinsic_infos[instr->intrinsic].name);
413       return false;
414    }
415 }
416 
ppir_emit_load_const(ppir_block * block,nir_instr * ni)417 static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)
418 {
419    nir_load_const_instr *instr = nir_instr_as_load_const(ni);
420    ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
421    if (!node)
422       return false;
423 
424    assert(instr->def.bit_size == 32);
425 
426    for (int i = 0; i < instr->def.num_components; i++)
427       node->constant.value[i].i = instr->value[i].i32;
428    node->constant.num = instr->def.num_components;
429 
430    list_addtail(&node->node.list, &block->node_list);
431    return true;
432 }
433 
ppir_emit_ssa_undef(ppir_block * block,nir_instr * ni)434 static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
435 {
436    nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
437    ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
438    if (!node)
439       return false;
440    ppir_alu_node *alu = ppir_node_to_alu(node);
441 
442    ppir_dest *dest = &alu->dest;
443    dest->ssa.undef = true;
444 
445    list_addtail(&node->list, &block->node_list);
446    return true;
447 }
448 
ppir_emit_tex(ppir_block * block,nir_instr * ni)449 static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
450 {
451    nir_tex_instr *instr = nir_instr_as_tex(ni);
452    ppir_load_texture_node *node;
453 
454    switch (instr->op) {
455    case nir_texop_tex:
456    case nir_texop_txb:
457    case nir_texop_txl:
458       break;
459    default:
460       ppir_error("unsupported texop %d\n", instr->op);
461       return false;
462    }
463 
464    switch (instr->sampler_dim) {
465    case GLSL_SAMPLER_DIM_1D:
466    case GLSL_SAMPLER_DIM_2D:
467    case GLSL_SAMPLER_DIM_3D:
468    case GLSL_SAMPLER_DIM_CUBE:
469    case GLSL_SAMPLER_DIM_RECT:
470    case GLSL_SAMPLER_DIM_EXTERNAL:
471       break;
472    default:
473       ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
474       return false;
475    }
476 
477    /* emit ld_tex node */
478 
479    unsigned mask = 0;
480    if (!instr->dest.is_ssa)
481       mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
482 
483    node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
484    if (!node)
485       return false;
486 
487    node->sampler = instr->texture_index;
488    node->sampler_dim = instr->sampler_dim;
489 
490    for (int i = 0; i < instr->coord_components; i++)
491          node->src[0].swizzle[i] = i;
492 
493    bool perspective = false;
494 
495    for (int i = 0; i < instr->num_srcs; i++) {
496       switch (instr->src[i].src_type) {
497       case nir_tex_src_backend1:
498          perspective = true;
499          FALLTHROUGH;
500       case nir_tex_src_coord: {
501          nir_src *ns = &instr->src[i].src;
502          if (ns->is_ssa) {
503             ppir_node *child = block->comp->var_nodes[ns->ssa->index];
504             if (child->op == ppir_op_load_varying) {
505                /* If the successor is load_texture, promote it to load_coords */
506                nir_tex_src *nts = (nir_tex_src *)ns;
507                if (nts->src_type == nir_tex_src_coord ||
508                    nts->src_type == nir_tex_src_backend1)
509                   child->op = ppir_op_load_coords;
510             }
511          }
512 
513          /* src[0] is not used by the ld_tex instruction but ensures
514           * correct scheduling due to the pipeline dependency */
515          ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
516                            u_bit_consecutive(0, instr->coord_components));
517          node->num_src++;
518          break;
519       }
520       case nir_tex_src_bias:
521       case nir_tex_src_lod:
522          node->lod_bias_en = true;
523          node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
524          ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
525          node->num_src++;
526          break;
527       default:
528          ppir_error("unsupported texture source type\n");
529          return false;
530       }
531    }
532 
533    list_addtail(&node->node.list, &block->node_list);
534 
535    /* validate load coords node */
536 
537    ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node;
538    ppir_load_node *load = NULL;
539 
540    if (src_coords && ppir_node_has_single_src_succ(src_coords) &&
541        (src_coords->op == ppir_op_load_coords))
542       load = ppir_node_to_load(src_coords);
543    else {
544       /* Create load_coords node */
545       load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0);
546       if (!load)
547          return false;
548       list_addtail(&load->node.list, &block->node_list);
549 
550       load->src = node->src[0];
551       load->num_src = 1;
552       load->num_components = instr->coord_components;
553 
554       ppir_debug("%s create load_coords node %d for %d\n",
555                  __FUNCTION__, load->index, node->node.index);
556 
557       ppir_node_foreach_pred_safe((&node->node), dep) {
558          ppir_node *pred = dep->pred;
559          ppir_node_remove_dep(dep);
560          ppir_node_add_dep(&load->node, pred, ppir_dep_src);
561       }
562       ppir_node_add_dep(&node->node, &load->node, ppir_dep_src);
563    }
564 
565    assert(load);
566 
567    if (perspective) {
568       if (instr->coord_components == 3)
569          load->perspective = ppir_perspective_z;
570       else
571          load->perspective = ppir_perspective_w;
572    }
573 
574    load->sampler_dim = instr->sampler_dim;
575    node->src[0].type = load->dest.type = ppir_target_pipeline;
576    node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
577 
578    return true;
579 }
580 
ppir_get_block(ppir_compiler * comp,nir_block * nblock)581 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
582 {
583    ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uintptr_t)nblock);
584 
585    return block;
586 }
587 
ppir_emit_jump(ppir_block * block,nir_instr * ni)588 static bool ppir_emit_jump(ppir_block *block, nir_instr *ni)
589 {
590    ppir_node *node;
591    ppir_compiler *comp = block->comp;
592    ppir_branch_node *branch;
593    ppir_block *jump_block;
594    nir_jump_instr *jump = nir_instr_as_jump(ni);
595 
596    switch (jump->type) {
597    case nir_jump_break: {
598       assert(comp->current_block->successors[0]);
599       assert(!comp->current_block->successors[1]);
600       jump_block = comp->current_block->successors[0];
601    }
602    break;
603    case nir_jump_continue:
604       jump_block = comp->loop_cont_block;
605    break;
606    default:
607       ppir_error("nir_jump_instr not support\n");
608       return false;
609    }
610 
611    assert(jump_block != NULL);
612 
613    node = ppir_node_create(block, ppir_op_branch, -1, 0);
614    if (!node)
615       return false;
616    branch = ppir_node_to_branch(node);
617 
618    /* Unconditional */
619    branch->num_src = 0;
620    branch->target = jump_block;
621 
622    list_addtail(&node->list, &block->node_list);
623    return true;
624 }
625 
626 static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
627    [nir_instr_type_alu]        = ppir_emit_alu,
628    [nir_instr_type_intrinsic]  = ppir_emit_intrinsic,
629    [nir_instr_type_load_const] = ppir_emit_load_const,
630    [nir_instr_type_ssa_undef]  = ppir_emit_ssa_undef,
631    [nir_instr_type_tex]        = ppir_emit_tex,
632    [nir_instr_type_jump]       = ppir_emit_jump,
633 };
634 
ppir_block_create(ppir_compiler * comp)635 static ppir_block *ppir_block_create(ppir_compiler *comp)
636 {
637    ppir_block *block = rzalloc(comp, ppir_block);
638    if (!block)
639       return NULL;
640 
641    list_inithead(&block->node_list);
642    list_inithead(&block->instr_list);
643 
644    block->comp = comp;
645 
646    return block;
647 }
648 
ppir_emit_block(ppir_compiler * comp,nir_block * nblock)649 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
650 {
651    ppir_block *block = ppir_get_block(comp, nblock);
652 
653    comp->current_block = block;
654 
655    list_addtail(&block->list, &comp->block_list);
656 
657    nir_foreach_instr(instr, nblock) {
658       assert(instr->type < nir_instr_type_phi);
659       if (!ppir_emit_instr[instr->type](block, instr))
660          return false;
661    }
662 
663    return true;
664 }
665 
666 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
667 
ppir_emit_if(ppir_compiler * comp,nir_if * if_stmt)668 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
669 {
670    ppir_node *node;
671    ppir_branch_node *else_branch, *after_branch;
672    nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
673    bool empty_else_block =
674       (nir_else_block == nir_if_last_else_block(if_stmt) &&
675       exec_list_is_empty(&nir_else_block->instr_list));
676    ppir_block *block = comp->current_block;
677 
678    node = ppir_node_create(block, ppir_op_branch, -1, 0);
679    if (!node)
680       return false;
681    else_branch = ppir_node_to_branch(node);
682    ppir_node_add_src(block->comp, node, &else_branch->src[0],
683                      &if_stmt->condition, 1);
684    else_branch->num_src = 1;
685    /* Negate condition to minimize branching. We're generating following:
686     * current_block: { ...; if (!statement) branch else_block; }
687     * then_block: { ...; branch after_block; }
688     * else_block: { ... }
689     * after_block: { ... }
690     *
691     * or if else list is empty:
692     * block: { if (!statement) branch else_block; }
693     * then_block: { ... }
694     * else_block: after_block: { ... }
695     */
696    else_branch->negate = true;
697    list_addtail(&else_branch->node.list, &block->node_list);
698 
699    if (!ppir_emit_cf_list(comp, &if_stmt->then_list))
700       return false;
701 
702    if (empty_else_block) {
703       nir_block *nblock = nir_if_last_else_block(if_stmt);
704       assert(nblock->successors[0]);
705       assert(!nblock->successors[1]);
706       else_branch->target = ppir_get_block(comp, nblock->successors[0]);
707       /* Add empty else block to the list */
708       list_addtail(&block->successors[1]->list, &comp->block_list);
709       return true;
710    }
711 
712    else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
713 
714    nir_block *last_then_block = nir_if_last_then_block(if_stmt);
715    assert(last_then_block->successors[0]);
716    assert(!last_then_block->successors[1]);
717    block = ppir_get_block(comp, last_then_block);
718    node = ppir_node_create(block, ppir_op_branch, -1, 0);
719    if (!node)
720       return false;
721    after_branch = ppir_node_to_branch(node);
722    /* Unconditional */
723    after_branch->num_src = 0;
724    after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
725    /* Target should be after_block, will fixup later */
726    list_addtail(&after_branch->node.list, &block->node_list);
727 
728    if (!ppir_emit_cf_list(comp, &if_stmt->else_list))
729       return false;
730 
731    return true;
732 }
733 
ppir_emit_loop(ppir_compiler * comp,nir_loop * nloop)734 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
735 {
736    ppir_block *save_loop_cont_block = comp->loop_cont_block;
737    ppir_block *block;
738    ppir_branch_node *loop_branch;
739    nir_block *loop_last_block;
740    ppir_node *node;
741 
742    comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
743 
744    if (!ppir_emit_cf_list(comp, &nloop->body))
745       return false;
746 
747    loop_last_block = nir_loop_last_block(nloop);
748    block = ppir_get_block(comp, loop_last_block);
749    node = ppir_node_create(block, ppir_op_branch, -1, 0);
750    if (!node)
751       return false;
752    loop_branch = ppir_node_to_branch(node);
753    /* Unconditional */
754    loop_branch->num_src = 0;
755    loop_branch->target = comp->loop_cont_block;
756    list_addtail(&loop_branch->node.list, &block->node_list);
757 
758    comp->loop_cont_block = save_loop_cont_block;
759 
760    comp->num_loops++;
761 
762    return true;
763 }
764 
ppir_emit_function(ppir_compiler * comp,nir_function_impl * nfunc)765 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
766 {
767    ppir_error("function nir_cf_node not support\n");
768    return false;
769 }
770 
ppir_emit_cf_list(ppir_compiler * comp,struct exec_list * list)771 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
772 {
773    foreach_list_typed(nir_cf_node, node, node, list) {
774       bool ret;
775 
776       switch (node->type) {
777       case nir_cf_node_block:
778          ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
779          break;
780       case nir_cf_node_if:
781          ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
782          break;
783       case nir_cf_node_loop:
784          ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
785          break;
786       case nir_cf_node_function:
787          ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
788          break;
789       default:
790          ppir_error("unknown NIR node type %d\n", node->type);
791          return false;
792       }
793 
794       if (!ret)
795          return false;
796    }
797 
798    return true;
799 }
800 
ppir_compiler_create(void * prog,unsigned num_reg,unsigned num_ssa)801 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
802 {
803    ppir_compiler *comp = rzalloc_size(
804       prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
805    if (!comp)
806       return NULL;
807 
808    list_inithead(&comp->block_list);
809    list_inithead(&comp->reg_list);
810    comp->reg_num = 0;
811    comp->blocks = _mesa_hash_table_u64_create(prog);
812 
813    comp->var_nodes = (ppir_node **)(comp + 1);
814    comp->reg_base = num_ssa;
815    comp->prog = prog;
816 
817    return comp;
818 }
819 
ppir_add_ordering_deps(ppir_compiler * comp)820 static void ppir_add_ordering_deps(ppir_compiler *comp)
821 {
822    /* Some intrinsics do not have explicit dependencies and thus depend
823     * on instructions order. Consider discard_if and the is_end node as
824     * example. If we don't add fake dependency of discard_if to is_end,
825     * scheduler may put the is_end first and since is_end terminates
826     * shader on Utgard PP, rest of it will never be executed.
827     * Add fake dependencies for discard/branch/store to preserve
828     * instruction order.
829     *
830     * TODO: scheduler should schedule discard_if as early as possible otherwise
831     * we may end up with suboptimal code for cases like this:
832     *
833     * s3 = s1 < s2
834     * discard_if s3
835     * s4 = s1 + s2
836     * store s4
837     *
838     * In this case store depends on discard_if and s4, but since dependencies can
839     * be scheduled in any order it can result in code like this:
840     *
841     * instr1: s3 = s1 < s3
842     * instr2: s4 = s1 + s2
843     * instr3: discard_if s3
844     * instr4: store s4
845     */
846    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
847       ppir_node *prev_node = NULL;
848       list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
849          if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
850             ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
851          }
852          if (node->is_out ||
853              node->op == ppir_op_discard ||
854              node->op == ppir_op_store_temp ||
855              node->op == ppir_op_branch) {
856             prev_node = node;
857          }
858       }
859    }
860 }
861 
ppir_print_shader_db(struct nir_shader * nir,ppir_compiler * comp,struct util_debug_callback * debug)862 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
863                                  struct util_debug_callback *debug)
864 {
865    const struct shader_info *info = &nir->info;
866    char *shaderdb;
867    ASSERTED int ret = asprintf(&shaderdb,
868                                "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
869                                gl_shader_stage_name(info->stage),
870                                comp->cur_instr_index,
871                                comp->num_loops,
872                                comp->num_spills,
873                                comp->num_fills);
874    assert(ret >= 0);
875 
876    if (lima_debug & LIMA_DEBUG_SHADERDB)
877       fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
878 
879    util_debug_message(debug, SHADER_INFO, "%s", shaderdb);
880    free(shaderdb);
881 }
882 
ppir_add_write_after_read_deps(ppir_compiler * comp)883 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
884 {
885    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
886       list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
887          ppir_node *write = NULL;
888          list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
889             for (int i = 0; i < ppir_node_get_src_num(node); i++) {
890                ppir_src *src = ppir_node_get_src(node, i);
891                if (src && src->type == ppir_target_register &&
892                    src->reg == reg &&
893                    write) {
894                   ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
895                   ppir_node_add_dep(write, node, ppir_dep_write_after_read);
896                }
897             }
898             ppir_dest *dest = ppir_node_get_dest(node);
899             if (dest && dest->type == ppir_target_register &&
900                 dest->reg == reg)
901                write = node;
902          }
903       }
904    }
905 }
906 
ppir_compile_nir(struct lima_fs_compiled_shader * prog,struct nir_shader * nir,struct ra_regs * ra,struct util_debug_callback * debug)907 bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir,
908                       struct ra_regs *ra,
909                       struct util_debug_callback *debug)
910 {
911    nir_function_impl *func = nir_shader_get_entrypoint(nir);
912    ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
913    if (!comp)
914       return false;
915 
916    comp->ra = ra;
917    comp->uses_discard = nir->info.fs.uses_discard;
918    comp->dual_source_blend = nir->info.fs.color_is_dual_source;
919 
920    /* 1st pass: create ppir blocks */
921    nir_foreach_function(function, nir) {
922       if (!function->impl)
923          continue;
924 
925       nir_foreach_block(nblock, function->impl) {
926          ppir_block *block = ppir_block_create(comp);
927          if (!block)
928             return false;
929          block->index = nblock->index;
930          _mesa_hash_table_u64_insert(comp->blocks, (uintptr_t)nblock, block);
931       }
932    }
933 
934    /* 2nd pass: populate successors */
935    nir_foreach_function(function, nir) {
936       if (!function->impl)
937          continue;
938 
939       nir_foreach_block(nblock, function->impl) {
940          ppir_block *block = ppir_get_block(comp, nblock);
941          assert(block);
942 
943          for (int i = 0; i < 2; i++) {
944             if (nblock->successors[i])
945                block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
946          }
947       }
948    }
949 
950    comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num);
951 
952    /* -1 means reg is not written by the shader */
953    for (int i = 0; i < ppir_output_num; i++)
954       comp->out_type_to_reg[i] = -1;
955 
956    foreach_list_typed(nir_register, reg, node, &func->registers) {
957       ppir_reg *r = rzalloc(comp, ppir_reg);
958       if (!r)
959          return false;
960 
961       r->index = reg->index;
962       r->num_components = reg->num_components;
963       r->is_head = false;
964       list_addtail(&r->list, &comp->reg_list);
965       comp->reg_num++;
966    }
967 
968    if (!ppir_emit_cf_list(comp, &func->body))
969       goto err_out0;
970 
971    /* If we have discard block add it to the very end */
972    if (comp->discard_block)
973       list_addtail(&comp->discard_block->list, &comp->block_list);
974 
975    ppir_node_print_prog(comp);
976 
977    if (!ppir_lower_prog(comp))
978       goto err_out0;
979 
980    ppir_add_ordering_deps(comp);
981    ppir_add_write_after_read_deps(comp);
982 
983    ppir_node_print_prog(comp);
984 
985    if (!ppir_node_to_instr(comp))
986       goto err_out0;
987 
988    if (!ppir_schedule_prog(comp))
989       goto err_out0;
990 
991    if (!ppir_regalloc_prog(comp))
992       goto err_out0;
993 
994    if (!ppir_codegen_prog(comp))
995       goto err_out0;
996 
997    ppir_print_shader_db(nir, comp, debug);
998 
999    _mesa_hash_table_u64_destroy(comp->blocks);
1000    ralloc_free(comp);
1001    return true;
1002 
1003 err_out0:
1004    _mesa_hash_table_u64_destroy(comp->blocks);
1005    ralloc_free(comp);
1006    return false;
1007 }
1008 
1009