1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/ralloc.h"
26 #include "compiler/nir/nir.h"
27 #include "pipe/p_state.h"
28
29
30 #include "gpir.h"
31 #include "lima_context.h"
32
gpir_create_reg(gpir_compiler * comp)33 gpir_reg *gpir_create_reg(gpir_compiler *comp)
34 {
35 gpir_reg *reg = ralloc(comp, gpir_reg);
36 reg->index = comp->cur_reg++;
37 list_addtail(®->list, &comp->reg_list);
38 return reg;
39 }
40
reg_for_nir_reg(gpir_compiler * comp,nir_register * nir_reg)41 static gpir_reg *reg_for_nir_reg(gpir_compiler *comp, nir_register *nir_reg)
42 {
43 unsigned index = nir_reg->index;
44 gpir_reg *reg = comp->reg_for_reg[index];
45 if (reg)
46 return reg;
47 reg = gpir_create_reg(comp);
48 comp->reg_for_reg[index] = reg;
49 return reg;
50 }
51
register_node_ssa(gpir_block * block,gpir_node * node,nir_ssa_def * ssa)52 static void register_node_ssa(gpir_block *block, gpir_node *node, nir_ssa_def *ssa)
53 {
54 block->comp->node_for_ssa[ssa->index] = node;
55 snprintf(node->name, sizeof(node->name), "ssa%d", ssa->index);
56
57 /* If any uses are outside the current block, we'll need to create a
58 * register and store to it.
59 */
60 bool needs_register = false;
61 nir_foreach_use(use, ssa) {
62 if (use->parent_instr->block != ssa->parent_instr->block) {
63 needs_register = true;
64 break;
65 }
66 }
67
68 if (!needs_register) {
69 nir_foreach_if_use(use, ssa) {
70 if (nir_cf_node_prev(&use->parent_if->cf_node) !=
71 &ssa->parent_instr->block->cf_node) {
72 needs_register = true;
73 break;
74 }
75 }
76 }
77
78 if (needs_register) {
79 gpir_store_node *store = gpir_node_create(block, gpir_op_store_reg);
80 store->child = node;
81 store->reg = gpir_create_reg(block->comp);
82 gpir_node_add_dep(&store->node, node, GPIR_DEP_INPUT);
83 list_addtail(&store->node.list, &block->node_list);
84 block->comp->reg_for_ssa[ssa->index] = store->reg;
85 }
86 }
87
register_node_reg(gpir_block * block,gpir_node * node,nir_reg_dest * nir_reg)88 static void register_node_reg(gpir_block *block, gpir_node *node, nir_reg_dest *nir_reg)
89 {
90 block->comp->node_for_reg[nir_reg->reg->index] = node;
91 gpir_store_node *store = gpir_node_create(block, gpir_op_store_reg);
92
93 snprintf(node->name, sizeof(node->name), "reg%d", nir_reg->reg->index);
94
95 store->child = node;
96 store->reg = reg_for_nir_reg(block->comp, nir_reg->reg);
97 gpir_node_add_dep(&store->node, node, GPIR_DEP_INPUT);
98
99 list_addtail(&store->node.list, &block->node_list);
100 }
101
102 /* Register the given gpir_node as providing the given NIR destination, so
103 * that gpir_node_find() will return it. Also insert any stores necessary if
104 * the destination will be used after the end of this basic block. The node
105 * must already be inserted.
106 */
register_node(gpir_block * block,gpir_node * node,nir_dest * dest)107 static void register_node(gpir_block *block, gpir_node *node, nir_dest *dest)
108 {
109 if (dest->is_ssa)
110 register_node_ssa(block, node, &dest->ssa);
111 else
112 register_node_reg(block, node, &dest->reg);
113 }
114
gpir_node_find(gpir_block * block,nir_src * src,int channel)115 static gpir_node *gpir_node_find(gpir_block *block, nir_src *src,
116 int channel)
117 {
118 gpir_reg *reg = NULL;
119 gpir_node *pred = NULL;
120 if (src->is_ssa) {
121 if (src->ssa->num_components > 1) {
122 for (int i = 0; i < GPIR_VECTOR_SSA_NUM; i++) {
123 if (block->comp->vector_ssa[i].ssa == src->ssa->index) {
124 return block->comp->vector_ssa[i].nodes[channel];
125 }
126 }
127 } else {
128 gpir_node *pred = block->comp->node_for_ssa[src->ssa->index];
129 if (pred->block == block)
130 return pred;
131 reg = block->comp->reg_for_ssa[src->ssa->index];
132 }
133 } else {
134 pred = block->comp->node_for_reg[src->reg.reg->index];
135 if (pred && pred->block == block)
136 return pred;
137 reg = reg_for_nir_reg(block->comp, src->reg.reg);
138 }
139
140 assert(reg);
141 pred = gpir_node_create(block, gpir_op_load_reg);
142 gpir_load_node *load = gpir_node_to_load(pred);
143 load->reg = reg;
144 list_addtail(&pred->list, &block->node_list);
145
146 return pred;
147 }
148
149 static int nir_to_gpir_opcodes[nir_num_opcodes] = {
150 [nir_op_fmul] = gpir_op_mul,
151 [nir_op_fadd] = gpir_op_add,
152 [nir_op_fneg] = gpir_op_neg,
153 [nir_op_fmin] = gpir_op_min,
154 [nir_op_fmax] = gpir_op_max,
155 [nir_op_frcp] = gpir_op_rcp,
156 [nir_op_frsq] = gpir_op_rsqrt,
157 [nir_op_fexp2] = gpir_op_exp2,
158 [nir_op_flog2] = gpir_op_log2,
159 [nir_op_slt] = gpir_op_lt,
160 [nir_op_sge] = gpir_op_ge,
161 [nir_op_fcsel] = gpir_op_select,
162 [nir_op_ffloor] = gpir_op_floor,
163 [nir_op_fsign] = gpir_op_sign,
164 [nir_op_seq] = gpir_op_eq,
165 [nir_op_sne] = gpir_op_ne,
166 [nir_op_fabs] = gpir_op_abs,
167 };
168
gpir_emit_alu(gpir_block * block,nir_instr * ni)169 static bool gpir_emit_alu(gpir_block *block, nir_instr *ni)
170 {
171 nir_alu_instr *instr = nir_instr_as_alu(ni);
172
173 /* gpir_op_mov is useless before the final scheduler, and the scheduler
174 * currently doesn't expect us to emit it. Just register the destination of
175 * this instruction with its source. This will also emit any necessary
176 * register loads/stores for things like "r0 = mov ssa_0" or
177 * "ssa_0 = mov r0".
178 */
179 if (instr->op == nir_op_mov) {
180 gpir_node *child = gpir_node_find(block, &instr->src[0].src,
181 instr->src[0].swizzle[0]);
182 register_node(block, child, &instr->dest.dest);
183 return true;
184 }
185
186 int op = nir_to_gpir_opcodes[instr->op];
187
188 if (op == gpir_op_unsupported) {
189 gpir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
190 return false;
191 }
192
193 gpir_alu_node *node = gpir_node_create(block, op);
194 if (unlikely(!node))
195 return false;
196
197 unsigned num_child = nir_op_infos[instr->op].num_inputs;
198 assert(num_child <= ARRAY_SIZE(node->children));
199 node->num_child = num_child;
200
201 for (int i = 0; i < num_child; i++) {
202 nir_alu_src *src = instr->src + i;
203 node->children_negate[i] = src->negate;
204
205 gpir_node *child = gpir_node_find(block, &src->src, src->swizzle[0]);
206 node->children[i] = child;
207
208 gpir_node_add_dep(&node->node, child, GPIR_DEP_INPUT);
209 }
210
211 list_addtail(&node->node.list, &block->node_list);
212 register_node(block, &node->node, &instr->dest.dest);
213
214 return true;
215 }
216
gpir_create_load(gpir_block * block,nir_dest * dest,int op,int index,int component)217 static gpir_node *gpir_create_load(gpir_block *block, nir_dest *dest,
218 int op, int index, int component)
219 {
220 gpir_load_node *load = gpir_node_create(block, op);
221 if (unlikely(!load))
222 return NULL;
223
224 load->index = index;
225 load->component = component;
226 list_addtail(&load->node.list, &block->node_list);
227 register_node(block, &load->node, dest);
228 return &load->node;
229 }
230
gpir_create_vector_load(gpir_block * block,nir_dest * dest,int index)231 static bool gpir_create_vector_load(gpir_block *block, nir_dest *dest, int index)
232 {
233 assert(dest->is_ssa);
234 assert(index < GPIR_VECTOR_SSA_NUM);
235
236 block->comp->vector_ssa[index].ssa = dest->ssa.index;
237
238 for (int i = 0; i < dest->ssa.num_components; i++) {
239 gpir_node *node = gpir_create_load(block, dest, gpir_op_load_uniform,
240 block->comp->constant_base + index, i);
241 if (!node)
242 return false;
243
244 block->comp->vector_ssa[index].nodes[i] = node;
245 snprintf(node->name, sizeof(node->name), "ssa%d.%c", dest->ssa.index, "xyzw"[i]);
246 }
247
248 return true;
249 }
250
gpir_emit_intrinsic(gpir_block * block,nir_instr * ni)251 static bool gpir_emit_intrinsic(gpir_block *block, nir_instr *ni)
252 {
253 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
254
255 switch (instr->intrinsic) {
256 case nir_intrinsic_load_input:
257 return gpir_create_load(block, &instr->dest,
258 gpir_op_load_attribute,
259 nir_intrinsic_base(instr),
260 nir_intrinsic_component(instr)) != NULL;
261 case nir_intrinsic_load_uniform:
262 {
263 int offset = nir_intrinsic_base(instr);
264 offset += (int)nir_src_as_float(instr->src[0]);
265
266 return gpir_create_load(block, &instr->dest,
267 gpir_op_load_uniform,
268 offset / 4, offset % 4) != NULL;
269 }
270 case nir_intrinsic_load_viewport_scale:
271 return gpir_create_vector_load(block, &instr->dest, GPIR_VECTOR_SSA_VIEWPORT_SCALE);
272 case nir_intrinsic_load_viewport_offset:
273 return gpir_create_vector_load(block, &instr->dest, GPIR_VECTOR_SSA_VIEWPORT_OFFSET);
274 case nir_intrinsic_store_output:
275 {
276 gpir_store_node *store = gpir_node_create(block, gpir_op_store_varying);
277 if (unlikely(!store))
278 return false;
279 gpir_node *child = gpir_node_find(block, instr->src, 0);
280 store->child = child;
281 store->index = nir_intrinsic_base(instr);
282 store->component = nir_intrinsic_component(instr);
283
284 gpir_node_add_dep(&store->node, child, GPIR_DEP_INPUT);
285 list_addtail(&store->node.list, &block->node_list);
286
287 return true;
288 }
289 default:
290 gpir_error("unsupported nir_intrinsic_instr %s\n",
291 nir_intrinsic_infos[instr->intrinsic].name);
292 return false;
293 }
294 }
295
gpir_emit_load_const(gpir_block * block,nir_instr * ni)296 static bool gpir_emit_load_const(gpir_block *block, nir_instr *ni)
297 {
298 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
299 gpir_const_node *node = gpir_node_create(block, gpir_op_const);
300 if (unlikely(!node))
301 return false;
302
303 assert(instr->def.bit_size == 32);
304 assert(instr->def.num_components == 1);
305
306 node->value.i = instr->value[0].i32;
307
308 list_addtail(&node->node.list, &block->node_list);
309 register_node_ssa(block, &node->node, &instr->def);
310 return true;
311 }
312
gpir_emit_ssa_undef(gpir_block * block,nir_instr * ni)313 static bool gpir_emit_ssa_undef(gpir_block *block, nir_instr *ni)
314 {
315 gpir_error("nir_ssa_undef_instr is not supported\n");
316 return false;
317 }
318
gpir_emit_tex(gpir_block * block,nir_instr * ni)319 static bool gpir_emit_tex(gpir_block *block, nir_instr *ni)
320 {
321 gpir_error("texture operations are not supported\n");
322 return false;
323 }
324
gpir_emit_jump(gpir_block * block,nir_instr * ni)325 static bool gpir_emit_jump(gpir_block *block, nir_instr *ni)
326 {
327 /* Jumps are emitted at the end of the basic block, so do nothing. */
328 return true;
329 }
330
331 static bool (*gpir_emit_instr[nir_instr_type_phi])(gpir_block *, nir_instr *) = {
332 [nir_instr_type_alu] = gpir_emit_alu,
333 [nir_instr_type_intrinsic] = gpir_emit_intrinsic,
334 [nir_instr_type_load_const] = gpir_emit_load_const,
335 [nir_instr_type_ssa_undef] = gpir_emit_ssa_undef,
336 [nir_instr_type_tex] = gpir_emit_tex,
337 [nir_instr_type_jump] = gpir_emit_jump,
338 };
339
gpir_emit_function(gpir_compiler * comp,nir_function_impl * impl)340 static bool gpir_emit_function(gpir_compiler *comp, nir_function_impl *impl)
341 {
342 nir_index_blocks(impl);
343 comp->blocks = ralloc_array(comp, gpir_block *, impl->num_blocks);
344
345 nir_foreach_block(block_nir, impl) {
346 gpir_block *block = ralloc(comp, gpir_block);
347 if (!block)
348 return false;
349
350 list_inithead(&block->node_list);
351 list_inithead(&block->instr_list);
352
353 list_addtail(&block->list, &comp->block_list);
354 block->comp = comp;
355 comp->blocks[block_nir->index] = block;
356 }
357
358 nir_foreach_block(block_nir, impl) {
359 gpir_block *block = comp->blocks[block_nir->index];
360 nir_foreach_instr(instr, block_nir) {
361 assert(instr->type < nir_instr_type_phi);
362 if (!gpir_emit_instr[instr->type](block, instr))
363 return false;
364 }
365
366 if (block_nir->successors[0] == impl->end_block)
367 block->successors[0] = NULL;
368 else
369 block->successors[0] = comp->blocks[block_nir->successors[0]->index];
370 block->successors[1] = NULL;
371
372 if (block_nir->successors[1] != NULL) {
373 nir_if *nif = nir_cf_node_as_if(nir_cf_node_next(&block_nir->cf_node));
374 gpir_alu_node *cond = gpir_node_create(block, gpir_op_not);
375 cond->children[0] = gpir_node_find(block, &nif->condition, 0);
376
377 gpir_node_add_dep(&cond->node, cond->children[0], GPIR_DEP_INPUT);
378 list_addtail(&cond->node.list, &block->node_list);
379
380 gpir_branch_node *branch = gpir_node_create(block, gpir_op_branch_cond);
381 list_addtail(&branch->node.list, &block->node_list);
382
383 branch->dest = comp->blocks[block_nir->successors[1]->index];
384 block->successors[1] = branch->dest;
385
386 branch->cond = &cond->node;
387 gpir_node_add_dep(&branch->node, &cond->node, GPIR_DEP_INPUT);
388
389 assert(block_nir->successors[0]->index == block_nir->index + 1);
390 } else if (block_nir->successors[0]->index != block_nir->index + 1) {
391 gpir_branch_node *branch = gpir_node_create(block, gpir_op_branch_uncond);
392 list_addtail(&branch->node.list, &block->node_list);
393
394 branch->dest = comp->blocks[block_nir->successors[0]->index];
395 }
396 }
397
398 return true;
399 }
400
gpir_compiler_create(void * prog,unsigned num_reg,unsigned num_ssa)401 static gpir_compiler *gpir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
402 {
403 gpir_compiler *comp = rzalloc(prog, gpir_compiler);
404
405 list_inithead(&comp->block_list);
406 list_inithead(&comp->reg_list);
407
408 for (int i = 0; i < GPIR_VECTOR_SSA_NUM; i++)
409 comp->vector_ssa[i].ssa = -1;
410
411 comp->node_for_ssa = rzalloc_array(comp, gpir_node *, num_ssa);
412 comp->node_for_reg = rzalloc_array(comp, gpir_node *, num_reg);
413 comp->reg_for_ssa = rzalloc_array(comp, gpir_reg *, num_ssa);
414 comp->reg_for_reg = rzalloc_array(comp, gpir_reg *, num_reg);
415 comp->prog = prog;
416 return comp;
417 }
418
gpir_glsl_type_size(enum glsl_base_type type)419 static int gpir_glsl_type_size(enum glsl_base_type type)
420 {
421 /* only support GLSL_TYPE_FLOAT */
422 assert(type == GLSL_TYPE_FLOAT);
423 return 4;
424 }
425
gpir_print_shader_db(struct nir_shader * nir,gpir_compiler * comp,struct util_debug_callback * debug)426 static void gpir_print_shader_db(struct nir_shader *nir, gpir_compiler *comp,
427 struct util_debug_callback *debug)
428 {
429 const struct shader_info *info = &nir->info;
430 char *shaderdb;
431 ASSERTED int ret = asprintf(&shaderdb,
432 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
433 gl_shader_stage_name(info->stage),
434 comp->num_instr,
435 comp->num_loops,
436 comp->num_spills,
437 comp->num_fills);
438 assert(ret >= 0);
439
440 if (lima_debug & LIMA_DEBUG_SHADERDB)
441 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
442
443 util_debug_message(debug, SHADER_INFO, "%s", shaderdb);
444 free(shaderdb);
445 }
446
gpir_compile_nir(struct lima_vs_compiled_shader * prog,struct nir_shader * nir,struct util_debug_callback * debug)447 bool gpir_compile_nir(struct lima_vs_compiled_shader *prog, struct nir_shader *nir,
448 struct util_debug_callback *debug)
449 {
450 nir_function_impl *func = nir_shader_get_entrypoint(nir);
451 gpir_compiler *comp = gpir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
452 if (!comp)
453 return false;
454
455 comp->constant_base = nir->num_uniforms;
456 prog->state.uniform_size = nir->num_uniforms * 16;
457 prog->state.gl_pos_idx = 0;
458 prog->state.point_size_idx = -1;
459
460 if (!gpir_emit_function(comp, func))
461 goto err_out0;
462
463 gpir_node_print_prog_seq(comp);
464 gpir_node_print_prog_dep(comp);
465
466 /* increase for viewport uniforms */
467 comp->constant_base += GPIR_VECTOR_SSA_NUM;
468
469 if (!gpir_optimize(comp))
470 goto err_out0;
471
472 if (!gpir_pre_rsched_lower_prog(comp))
473 goto err_out0;
474
475 if (!gpir_reduce_reg_pressure_schedule_prog(comp))
476 goto err_out0;
477
478 if (!gpir_regalloc_prog(comp))
479 goto err_out0;
480
481 if (!gpir_schedule_prog(comp))
482 goto err_out0;
483
484 if (!gpir_codegen_prog(comp))
485 goto err_out0;
486
487 /* initialize to support accumulating below */
488 nir_foreach_shader_out_variable(var, nir) {
489 struct lima_varying_info *v = prog->state.varying + var->data.driver_location;
490 v->components = 0;
491 }
492
493 nir_foreach_shader_out_variable(var, nir) {
494 bool varying = true;
495 switch (var->data.location) {
496 case VARYING_SLOT_POS:
497 prog->state.gl_pos_idx = var->data.driver_location;
498 varying = false;
499 break;
500 case VARYING_SLOT_PSIZ:
501 prog->state.point_size_idx = var->data.driver_location;
502 varying = false;
503 break;
504 }
505
506 struct lima_varying_info *v = prog->state.varying + var->data.driver_location;
507 if (!v->components) {
508 v->component_size = gpir_glsl_type_size(glsl_get_base_type(var->type));
509 prog->state.num_outputs++;
510 if (varying)
511 prog->state.num_varyings++;
512 }
513
514 v->components += glsl_get_components(var->type);
515 }
516
517 gpir_print_shader_db(nir, comp, debug);
518
519 ralloc_free(comp);
520 return true;
521
522 err_out0:
523 ralloc_free(comp);
524 return false;
525 }
526
527