1 /*
2 * Copyright © 2016-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "broadcom/common/v3d_device_info.h"
25 #include "v3d_compiler.h"
26 #include "util/u_prim.h"
27 #include "compiler/nir/nir_schedule.h"
28
29 int
vir_get_nsrc(struct qinst * inst)30 vir_get_nsrc(struct qinst *inst)
31 {
32 switch (inst->qpu.type) {
33 case V3D_QPU_INSTR_TYPE_BRANCH:
34 return 0;
35 case V3D_QPU_INSTR_TYPE_ALU:
36 if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
37 return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
38 else
39 return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
40 }
41
42 return 0;
43 }
44
45 /**
46 * Returns whether the instruction has any side effects that must be
47 * preserved.
48 */
49 bool
vir_has_side_effects(struct v3d_compile * c,struct qinst * inst)50 vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
51 {
52 switch (inst->qpu.type) {
53 case V3D_QPU_INSTR_TYPE_BRANCH:
54 return true;
55 case V3D_QPU_INSTR_TYPE_ALU:
56 switch (inst->qpu.alu.add.op) {
57 case V3D_QPU_A_SETREVF:
58 case V3D_QPU_A_SETMSF:
59 case V3D_QPU_A_VPMSETUP:
60 case V3D_QPU_A_STVPMV:
61 case V3D_QPU_A_STVPMD:
62 case V3D_QPU_A_STVPMP:
63 case V3D_QPU_A_VPMWT:
64 case V3D_QPU_A_TMUWT:
65 return true;
66 default:
67 break;
68 }
69
70 switch (inst->qpu.alu.mul.op) {
71 case V3D_QPU_M_MULTOP:
72 return true;
73 default:
74 break;
75 }
76 }
77
78 if (inst->qpu.sig.ldtmu ||
79 inst->qpu.sig.ldvary ||
80 inst->qpu.sig.ldtlbu ||
81 inst->qpu.sig.ldtlb ||
82 inst->qpu.sig.wrtmuc ||
83 inst->qpu.sig.thrsw) {
84 return true;
85 }
86
87 return false;
88 }
89
90 bool
vir_is_raw_mov(struct qinst * inst)91 vir_is_raw_mov(struct qinst *inst)
92 {
93 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
94 (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
95 inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
96 return false;
97 }
98
99 if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
100 inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
101 return false;
102 }
103
104 if (inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
105 inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE ||
106 inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
107 inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) {
108 return false;
109 }
110
111 if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
112 inst->qpu.flags.mc != V3D_QPU_COND_NONE)
113 return false;
114
115 return true;
116 }
117
118 bool
vir_is_add(struct qinst * inst)119 vir_is_add(struct qinst *inst)
120 {
121 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
122 inst->qpu.alu.add.op != V3D_QPU_A_NOP);
123 }
124
125 bool
vir_is_mul(struct qinst * inst)126 vir_is_mul(struct qinst *inst)
127 {
128 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
129 inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
130 }
131
132 bool
vir_is_tex(struct qinst * inst)133 vir_is_tex(struct qinst *inst)
134 {
135 if (inst->dst.file == QFILE_MAGIC)
136 return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
137
138 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
139 inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
140 return true;
141 }
142
143 return false;
144 }
145
146 bool
vir_writes_r3(const struct v3d_device_info * devinfo,struct qinst * inst)147 vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
148 {
149 for (int i = 0; i < vir_get_nsrc(inst); i++) {
150 switch (inst->src[i].file) {
151 case QFILE_VPM:
152 return true;
153 default:
154 break;
155 }
156 }
157
158 if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
159 inst->qpu.sig.ldtlb ||
160 inst->qpu.sig.ldtlbu ||
161 inst->qpu.sig.ldvpm)) {
162 return true;
163 }
164
165 return false;
166 }
167
168 bool
vir_writes_r4(const struct v3d_device_info * devinfo,struct qinst * inst)169 vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
170 {
171 switch (inst->dst.file) {
172 case QFILE_MAGIC:
173 switch (inst->dst.index) {
174 case V3D_QPU_WADDR_RECIP:
175 case V3D_QPU_WADDR_RSQRT:
176 case V3D_QPU_WADDR_EXP:
177 case V3D_QPU_WADDR_LOG:
178 case V3D_QPU_WADDR_SIN:
179 return true;
180 }
181 break;
182 default:
183 break;
184 }
185
186 if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
187 return true;
188
189 return false;
190 }
191
192 void
vir_set_unpack(struct qinst * inst,int src,enum v3d_qpu_input_unpack unpack)193 vir_set_unpack(struct qinst *inst, int src,
194 enum v3d_qpu_input_unpack unpack)
195 {
196 assert(src == 0 || src == 1);
197
198 if (vir_is_add(inst)) {
199 if (src == 0)
200 inst->qpu.alu.add.a_unpack = unpack;
201 else
202 inst->qpu.alu.add.b_unpack = unpack;
203 } else {
204 assert(vir_is_mul(inst));
205 if (src == 0)
206 inst->qpu.alu.mul.a_unpack = unpack;
207 else
208 inst->qpu.alu.mul.b_unpack = unpack;
209 }
210 }
211
212 void
vir_set_pack(struct qinst * inst,enum v3d_qpu_output_pack pack)213 vir_set_pack(struct qinst *inst, enum v3d_qpu_output_pack pack)
214 {
215 if (vir_is_add(inst)) {
216 inst->qpu.alu.add.output_pack = pack;
217 } else {
218 assert(vir_is_mul(inst));
219 inst->qpu.alu.mul.output_pack = pack;
220 }
221 }
222
223 void
vir_set_cond(struct qinst * inst,enum v3d_qpu_cond cond)224 vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
225 {
226 if (vir_is_add(inst)) {
227 inst->qpu.flags.ac = cond;
228 } else {
229 assert(vir_is_mul(inst));
230 inst->qpu.flags.mc = cond;
231 }
232 }
233
234 void
vir_set_pf(struct qinst * inst,enum v3d_qpu_pf pf)235 vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
236 {
237 if (vir_is_add(inst)) {
238 inst->qpu.flags.apf = pf;
239 } else {
240 assert(vir_is_mul(inst));
241 inst->qpu.flags.mpf = pf;
242 }
243 }
244
245 void
vir_set_uf(struct qinst * inst,enum v3d_qpu_uf uf)246 vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf)
247 {
248 if (vir_is_add(inst)) {
249 inst->qpu.flags.auf = uf;
250 } else {
251 assert(vir_is_mul(inst));
252 inst->qpu.flags.muf = uf;
253 }
254 }
255
256 #if 0
257 uint8_t
258 vir_channels_written(struct qinst *inst)
259 {
260 if (vir_is_mul(inst)) {
261 switch (inst->dst.pack) {
262 case QPU_PACK_MUL_NOP:
263 case QPU_PACK_MUL_8888:
264 return 0xf;
265 case QPU_PACK_MUL_8A:
266 return 0x1;
267 case QPU_PACK_MUL_8B:
268 return 0x2;
269 case QPU_PACK_MUL_8C:
270 return 0x4;
271 case QPU_PACK_MUL_8D:
272 return 0x8;
273 }
274 } else {
275 switch (inst->dst.pack) {
276 case QPU_PACK_A_NOP:
277 case QPU_PACK_A_8888:
278 case QPU_PACK_A_8888_SAT:
279 case QPU_PACK_A_32_SAT:
280 return 0xf;
281 case QPU_PACK_A_8A:
282 case QPU_PACK_A_8A_SAT:
283 return 0x1;
284 case QPU_PACK_A_8B:
285 case QPU_PACK_A_8B_SAT:
286 return 0x2;
287 case QPU_PACK_A_8C:
288 case QPU_PACK_A_8C_SAT:
289 return 0x4;
290 case QPU_PACK_A_8D:
291 case QPU_PACK_A_8D_SAT:
292 return 0x8;
293 case QPU_PACK_A_16A:
294 case QPU_PACK_A_16A_SAT:
295 return 0x3;
296 case QPU_PACK_A_16B:
297 case QPU_PACK_A_16B_SAT:
298 return 0xc;
299 }
300 }
301 unreachable("Bad pack field");
302 }
303 #endif
304
305 struct qreg
vir_get_temp(struct v3d_compile * c)306 vir_get_temp(struct v3d_compile *c)
307 {
308 struct qreg reg;
309
310 reg.file = QFILE_TEMP;
311 reg.index = c->num_temps++;
312
313 if (c->num_temps > c->defs_array_size) {
314 uint32_t old_size = c->defs_array_size;
315 c->defs_array_size = MAX2(old_size * 2, 16);
316
317 c->defs = reralloc(c, c->defs, struct qinst *,
318 c->defs_array_size);
319 memset(&c->defs[old_size], 0,
320 sizeof(c->defs[0]) * (c->defs_array_size - old_size));
321
322 c->spillable = reralloc(c, c->spillable,
323 BITSET_WORD,
324 BITSET_WORDS(c->defs_array_size));
325 for (int i = old_size; i < c->defs_array_size; i++)
326 BITSET_SET(c->spillable, i);
327 }
328
329 return reg;
330 }
331
332 struct qinst *
vir_add_inst(enum v3d_qpu_add_op op,struct qreg dst,struct qreg src0,struct qreg src1)333 vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
334 {
335 struct qinst *inst = calloc(1, sizeof(*inst));
336
337 inst->qpu = v3d_qpu_nop();
338 inst->qpu.alu.add.op = op;
339
340 inst->dst = dst;
341 inst->src[0] = src0;
342 inst->src[1] = src1;
343 inst->uniform = ~0;
344
345 return inst;
346 }
347
348 struct qinst *
vir_mul_inst(enum v3d_qpu_mul_op op,struct qreg dst,struct qreg src0,struct qreg src1)349 vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
350 {
351 struct qinst *inst = calloc(1, sizeof(*inst));
352
353 inst->qpu = v3d_qpu_nop();
354 inst->qpu.alu.mul.op = op;
355
356 inst->dst = dst;
357 inst->src[0] = src0;
358 inst->src[1] = src1;
359 inst->uniform = ~0;
360
361 return inst;
362 }
363
364 struct qinst *
vir_branch_inst(struct v3d_compile * c,enum v3d_qpu_branch_cond cond)365 vir_branch_inst(struct v3d_compile *c, enum v3d_qpu_branch_cond cond)
366 {
367 struct qinst *inst = calloc(1, sizeof(*inst));
368
369 inst->qpu = v3d_qpu_nop();
370 inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
371 inst->qpu.branch.cond = cond;
372 inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
373 inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
374 inst->qpu.branch.ub = true;
375 inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
376
377 inst->dst = vir_nop_reg();
378 inst->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT, 0);
379
380 return inst;
381 }
382
383 static void
vir_emit(struct v3d_compile * c,struct qinst * inst)384 vir_emit(struct v3d_compile *c, struct qinst *inst)
385 {
386 switch (c->cursor.mode) {
387 case vir_cursor_add:
388 list_add(&inst->link, c->cursor.link);
389 break;
390 case vir_cursor_addtail:
391 list_addtail(&inst->link, c->cursor.link);
392 break;
393 }
394
395 c->cursor = vir_after_inst(inst);
396 c->live_intervals_valid = false;
397 }
398
399 /* Updates inst to write to a new temporary, emits it, and notes the def. */
400 struct qreg
vir_emit_def(struct v3d_compile * c,struct qinst * inst)401 vir_emit_def(struct v3d_compile *c, struct qinst *inst)
402 {
403 assert(inst->dst.file == QFILE_NULL);
404
405 /* If we're emitting an instruction that's a def, it had better be
406 * writing a register.
407 */
408 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
409 assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP ||
410 v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op));
411 assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP ||
412 v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op));
413 }
414
415 inst->dst = vir_get_temp(c);
416
417 if (inst->dst.file == QFILE_TEMP)
418 c->defs[inst->dst.index] = inst;
419
420 vir_emit(c, inst);
421
422 return inst->dst;
423 }
424
425 struct qinst *
vir_emit_nondef(struct v3d_compile * c,struct qinst * inst)426 vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
427 {
428 if (inst->dst.file == QFILE_TEMP)
429 c->defs[inst->dst.index] = NULL;
430
431 vir_emit(c, inst);
432
433 return inst;
434 }
435
436 struct qblock *
vir_new_block(struct v3d_compile * c)437 vir_new_block(struct v3d_compile *c)
438 {
439 struct qblock *block = rzalloc(c, struct qblock);
440
441 list_inithead(&block->instructions);
442
443 block->predecessors = _mesa_set_create(block,
444 _mesa_hash_pointer,
445 _mesa_key_pointer_equal);
446
447 block->index = c->next_block_index++;
448
449 return block;
450 }
451
452 void
vir_set_emit_block(struct v3d_compile * c,struct qblock * block)453 vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
454 {
455 c->cur_block = block;
456 c->cursor = vir_after_block(block);
457 list_addtail(&block->link, &c->blocks);
458 }
459
460 struct qblock *
vir_entry_block(struct v3d_compile * c)461 vir_entry_block(struct v3d_compile *c)
462 {
463 return list_first_entry(&c->blocks, struct qblock, link);
464 }
465
466 struct qblock *
vir_exit_block(struct v3d_compile * c)467 vir_exit_block(struct v3d_compile *c)
468 {
469 return list_last_entry(&c->blocks, struct qblock, link);
470 }
471
472 void
vir_link_blocks(struct qblock * predecessor,struct qblock * successor)473 vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
474 {
475 _mesa_set_add(successor->predecessors, predecessor);
476 if (predecessor->successors[0]) {
477 assert(!predecessor->successors[1]);
478 predecessor->successors[1] = successor;
479 } else {
480 predecessor->successors[0] = successor;
481 }
482 }
483
484 const struct v3d_compiler *
v3d_compiler_init(const struct v3d_device_info * devinfo)485 v3d_compiler_init(const struct v3d_device_info *devinfo)
486 {
487 struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
488 if (!compiler)
489 return NULL;
490
491 compiler->devinfo = devinfo;
492
493 if (!vir_init_reg_sets(compiler)) {
494 ralloc_free(compiler);
495 return NULL;
496 }
497
498 return compiler;
499 }
500
501 void
v3d_compiler_free(const struct v3d_compiler * compiler)502 v3d_compiler_free(const struct v3d_compiler *compiler)
503 {
504 ralloc_free((void *)compiler);
505 }
506
507 static struct v3d_compile *
vir_compile_init(const struct v3d_compiler * compiler,struct v3d_key * key,nir_shader * s,void (* debug_output)(const char * msg,void * debug_output_data),void * debug_output_data,int program_id,int variant_id,bool fallback_scheduler)508 vir_compile_init(const struct v3d_compiler *compiler,
509 struct v3d_key *key,
510 nir_shader *s,
511 void (*debug_output)(const char *msg,
512 void *debug_output_data),
513 void *debug_output_data,
514 int program_id, int variant_id,
515 bool fallback_scheduler)
516 {
517 struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
518
519 c->compiler = compiler;
520 c->devinfo = compiler->devinfo;
521 c->key = key;
522 c->program_id = program_id;
523 c->variant_id = variant_id;
524 c->threads = 4;
525 c->debug_output = debug_output;
526 c->debug_output_data = debug_output_data;
527 c->compilation_result = V3D_COMPILATION_SUCCEEDED;
528 c->fallback_scheduler = fallback_scheduler;
529
530 s = nir_shader_clone(c, s);
531 c->s = s;
532
533 list_inithead(&c->blocks);
534 vir_set_emit_block(c, vir_new_block(c));
535
536 c->output_position_index = -1;
537 c->output_sample_mask_index = -1;
538
539 c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
540 _mesa_key_pointer_equal);
541
542 return c;
543 }
544
545 static int
type_size_vec4(const struct glsl_type * type,bool bindless)546 type_size_vec4(const struct glsl_type *type, bool bindless)
547 {
548 return glsl_count_attribute_slots(type, false);
549 }
550
551 static void
v3d_lower_nir(struct v3d_compile * c)552 v3d_lower_nir(struct v3d_compile *c)
553 {
554 struct nir_lower_tex_options tex_options = {
555 .lower_txd = true,
556 .lower_tg4_broadcom_swizzle = true,
557
558 .lower_rect = false, /* XXX: Use this on V3D 3.x */
559 .lower_txp = ~0,
560 /* Apply swizzles to all samplers. */
561 .swizzle_result = ~0,
562 };
563
564 /* Lower the format swizzle and (for 32-bit returns)
565 * ARB_texture_swizzle-style swizzle.
566 */
567 for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
568 for (int j = 0; j < 4; j++)
569 tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
570
571 if (c->key->tex[i].clamp_s)
572 tex_options.saturate_s |= 1 << i;
573 if (c->key->tex[i].clamp_t)
574 tex_options.saturate_t |= 1 << i;
575 if (c->key->tex[i].clamp_r)
576 tex_options.saturate_r |= 1 << i;
577 if (c->key->tex[i].return_size == 16) {
578 tex_options.lower_tex_packing[i] =
579 nir_lower_tex_packing_16;
580 }
581 }
582
583 /* CS textures may not have return_size reflecting the shadow state. */
584 nir_foreach_uniform_variable(var, c->s) {
585 const struct glsl_type *type = glsl_without_array(var->type);
586 unsigned array_len = MAX2(glsl_get_length(var->type), 1);
587
588 if (!glsl_type_is_sampler(type) ||
589 !glsl_sampler_type_is_shadow(type))
590 continue;
591
592 for (int i = 0; i < array_len; i++) {
593 tex_options.lower_tex_packing[var->data.binding + i] =
594 nir_lower_tex_packing_16;
595 }
596 }
597
598 NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
599 NIR_PASS_V(c->s, nir_lower_system_values);
600 NIR_PASS_V(c->s, nir_lower_compute_system_values, NULL);
601
602 NIR_PASS_V(c->s, nir_lower_vars_to_scratch,
603 nir_var_function_temp,
604 0,
605 glsl_get_natural_size_align_bytes);
606 NIR_PASS_V(c->s, v3d_nir_lower_scratch);
607 }
608
609 static void
v3d_set_prog_data_uniforms(struct v3d_compile * c,struct v3d_prog_data * prog_data)610 v3d_set_prog_data_uniforms(struct v3d_compile *c,
611 struct v3d_prog_data *prog_data)
612 {
613 int count = c->num_uniforms;
614 struct v3d_uniform_list *ulist = &prog_data->uniforms;
615
616 ulist->count = count;
617 ulist->data = ralloc_array(prog_data, uint32_t, count);
618 memcpy(ulist->data, c->uniform_data,
619 count * sizeof(*ulist->data));
620 ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
621 memcpy(ulist->contents, c->uniform_contents,
622 count * sizeof(*ulist->contents));
623 }
624
625 static void
v3d_vs_set_prog_data(struct v3d_compile * c,struct v3d_vs_prog_data * prog_data)626 v3d_vs_set_prog_data(struct v3d_compile *c,
627 struct v3d_vs_prog_data *prog_data)
628 {
629 /* The vertex data gets format converted by the VPM so that
630 * each attribute channel takes up a VPM column. Precompute
631 * the sizes for the shader record.
632 */
633 for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
634 prog_data->vattr_sizes[i] = c->vattr_sizes[i];
635 prog_data->vpm_input_size += c->vattr_sizes[i];
636 }
637
638 prog_data->uses_vid = BITSET_TEST(c->s->info.system_values_read,
639 SYSTEM_VALUE_VERTEX_ID) ||
640 BITSET_TEST(c->s->info.system_values_read,
641 SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
642
643 prog_data->uses_biid = BITSET_TEST(c->s->info.system_values_read,
644 SYSTEM_VALUE_BASE_INSTANCE);
645
646 prog_data->uses_iid = BITSET_TEST(c->s->info.system_values_read,
647 SYSTEM_VALUE_INSTANCE_ID) ||
648 BITSET_TEST(c->s->info.system_values_read,
649 SYSTEM_VALUE_INSTANCE_INDEX);
650
651 if (prog_data->uses_vid)
652 prog_data->vpm_input_size++;
653 if (prog_data->uses_biid)
654 prog_data->vpm_input_size++;
655 if (prog_data->uses_iid)
656 prog_data->vpm_input_size++;
657
658 /* Input/output segment size are in sectors (8 rows of 32 bits per
659 * channel).
660 */
661 prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
662 prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8;
663
664 /* Set us up for shared input/output segments. This is apparently
665 * necessary for our VCM setup to avoid varying corruption.
666 */
667 prog_data->separate_segments = false;
668 prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
669 prog_data->vpm_input_size);
670 prog_data->vpm_input_size = 0;
671
672 /* Compute VCM cache size. We set up our program to take up less than
673 * half of the VPM, so that any set of bin and render programs won't
674 * run out of space. We need space for at least one input segment,
675 * and then allocate the rest to output segments (one for the current
676 * program, the rest to VCM). The valid range of the VCM cache size
677 * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4
678 * batches.
679 */
680 assert(c->devinfo->vpm_size);
681 int sector_size = V3D_CHANNELS * sizeof(uint32_t) * 8;
682 int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size;
683 int half_vpm = vpm_size_in_sectors / 2;
684 int vpm_output_sectors = half_vpm - prog_data->vpm_input_size;
685 int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size;
686 assert(vpm_output_batches >= 2);
687 prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
688 }
689
690 static void
v3d_gs_set_prog_data(struct v3d_compile * c,struct v3d_gs_prog_data * prog_data)691 v3d_gs_set_prog_data(struct v3d_compile *c,
692 struct v3d_gs_prog_data *prog_data)
693 {
694 prog_data->num_inputs = c->num_inputs;
695 memcpy(prog_data->input_slots, c->input_slots,
696 c->num_inputs * sizeof(*c->input_slots));
697
698 /* gl_PrimitiveIdIn is written by the GBG into the first word of the
699 * VPM output header automatically and the shader will overwrite
700 * it after reading it if necessary, so it doesn't add to the VPM
701 * size requirements.
702 */
703 prog_data->uses_pid = BITSET_TEST(c->s->info.system_values_read,
704 SYSTEM_VALUE_PRIMITIVE_ID);
705
706 /* Output segment size is in sectors (8 rows of 32 bits per channel) */
707 prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8;
708
709 /* Compute SIMD dispatch width and update VPM output size accordingly
710 * to ensure we can fit our program in memory. Available widths are
711 * 16, 8, 4, 1.
712 *
713 * Notice that at draw time we will have to consider VPM memory
714 * requirements from other stages and choose a smaller dispatch
715 * width if needed to fit the program in VPM memory.
716 */
717 prog_data->simd_width = 16;
718 while ((prog_data->simd_width > 1 && prog_data->vpm_output_size > 16) ||
719 prog_data->simd_width == 2) {
720 prog_data->simd_width >>= 1;
721 prog_data->vpm_output_size =
722 align(prog_data->vpm_output_size, 2) / 2;
723 }
724 assert(prog_data->vpm_output_size <= 16);
725 assert(prog_data->simd_width != 2);
726
727 prog_data->out_prim_type = c->s->info.gs.output_primitive;
728 prog_data->num_invocations = c->s->info.gs.invocations;
729 }
730
731 static void
v3d_set_fs_prog_data_inputs(struct v3d_compile * c,struct v3d_fs_prog_data * prog_data)732 v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
733 struct v3d_fs_prog_data *prog_data)
734 {
735 prog_data->num_inputs = c->num_inputs;
736 memcpy(prog_data->input_slots, c->input_slots,
737 c->num_inputs * sizeof(*c->input_slots));
738
739 STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
740 (V3D_MAX_FS_INPUTS - 1) / 24);
741 for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
742 if (BITSET_TEST(c->flat_shade_flags, i))
743 prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
744
745 if (BITSET_TEST(c->noperspective_flags, i))
746 prog_data->noperspective_flags[i / 24] |= 1 << (i % 24);
747
748 if (BITSET_TEST(c->centroid_flags, i))
749 prog_data->centroid_flags[i / 24] |= 1 << (i % 24);
750 }
751 }
752
753 static void
v3d_fs_set_prog_data(struct v3d_compile * c,struct v3d_fs_prog_data * prog_data)754 v3d_fs_set_prog_data(struct v3d_compile *c,
755 struct v3d_fs_prog_data *prog_data)
756 {
757 v3d_set_fs_prog_data_inputs(c, prog_data);
758 prog_data->writes_z = c->writes_z;
759 prog_data->disable_ez = !c->s->info.fs.early_fragment_tests;
760 prog_data->uses_center_w = c->uses_center_w;
761 prog_data->uses_implicit_point_line_varyings =
762 c->uses_implicit_point_line_varyings;
763 prog_data->lock_scoreboard_on_first_thrsw =
764 c->lock_scoreboard_on_first_thrsw;
765 prog_data->force_per_sample_msaa = c->force_per_sample_msaa;
766 }
767
768 static void
v3d_cs_set_prog_data(struct v3d_compile * c,struct v3d_compute_prog_data * prog_data)769 v3d_cs_set_prog_data(struct v3d_compile *c,
770 struct v3d_compute_prog_data *prog_data)
771 {
772 prog_data->shared_size = c->s->info.cs.shared_size;
773 }
774
775 static void
v3d_set_prog_data(struct v3d_compile * c,struct v3d_prog_data * prog_data)776 v3d_set_prog_data(struct v3d_compile *c,
777 struct v3d_prog_data *prog_data)
778 {
779 prog_data->threads = c->threads;
780 prog_data->single_seg = !c->last_thrsw;
781 prog_data->spill_size = c->spill_size;
782 prog_data->tmu_dirty_rcl = c->tmu_dirty_rcl;
783
784 v3d_set_prog_data_uniforms(c, prog_data);
785
786 switch (c->s->info.stage) {
787 case MESA_SHADER_VERTEX:
788 v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
789 break;
790 case MESA_SHADER_GEOMETRY:
791 v3d_gs_set_prog_data(c, (struct v3d_gs_prog_data *)prog_data);
792 break;
793 case MESA_SHADER_FRAGMENT:
794 v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data);
795 break;
796 case MESA_SHADER_COMPUTE:
797 v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data);
798 break;
799 default:
800 unreachable("unsupported shader stage");
801 }
802 }
803
804 static uint64_t *
v3d_return_qpu_insts(struct v3d_compile * c,uint32_t * final_assembly_size)805 v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
806 {
807 *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
808
809 uint64_t *qpu_insts = malloc(*final_assembly_size);
810 if (!qpu_insts)
811 return NULL;
812
813 memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
814
815 vir_compile_destroy(c);
816
817 return qpu_insts;
818 }
819
820 static void
v3d_nir_lower_vs_early(struct v3d_compile * c)821 v3d_nir_lower_vs_early(struct v3d_compile *c)
822 {
823 /* Split our I/O vars and dead code eliminate the unused
824 * components.
825 */
826 NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
827 nir_var_shader_in | nir_var_shader_out);
828 uint64_t used_outputs[4] = {0};
829 for (int i = 0; i < c->vs_key->num_used_outputs; i++) {
830 int slot = v3d_slot_get_slot(c->vs_key->used_outputs[i]);
831 int comp = v3d_slot_get_component(c->vs_key->used_outputs[i]);
832 used_outputs[comp] |= 1ull << slot;
833 }
834 NIR_PASS_V(c->s, nir_remove_unused_io_vars,
835 nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
836 NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
837 v3d_optimize_nir(c->s);
838 NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
839
840 /* This must go before nir_lower_io */
841 if (c->vs_key->per_vertex_point_size)
842 NIR_PASS_V(c->s, nir_lower_point_size, 1.0f, 0.0f);
843
844 NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
845 type_size_vec4,
846 (nir_lower_io_options)0);
847 /* clean up nir_lower_io's deref_var remains and do a constant folding pass
848 * on the code it generated.
849 */
850 NIR_PASS_V(c->s, nir_opt_dce);
851 NIR_PASS_V(c->s, nir_opt_constant_folding);
852 }
853
854 static void
v3d_nir_lower_gs_early(struct v3d_compile * c)855 v3d_nir_lower_gs_early(struct v3d_compile *c)
856 {
857 /* Split our I/O vars and dead code eliminate the unused
858 * components.
859 */
860 NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
861 nir_var_shader_in | nir_var_shader_out);
862 uint64_t used_outputs[4] = {0};
863 for (int i = 0; i < c->gs_key->num_used_outputs; i++) {
864 int slot = v3d_slot_get_slot(c->gs_key->used_outputs[i]);
865 int comp = v3d_slot_get_component(c->gs_key->used_outputs[i]);
866 used_outputs[comp] |= 1ull << slot;
867 }
868 NIR_PASS_V(c->s, nir_remove_unused_io_vars,
869 nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
870 NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
871 v3d_optimize_nir(c->s);
872 NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
873
874 /* This must go before nir_lower_io */
875 if (c->gs_key->per_vertex_point_size)
876 NIR_PASS_V(c->s, nir_lower_point_size, 1.0f, 0.0f);
877
878 NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
879 type_size_vec4,
880 (nir_lower_io_options)0);
881 /* clean up nir_lower_io's deref_var remains */
882 NIR_PASS_V(c->s, nir_opt_dce);
883 }
884
885 static void
v3d_fixup_fs_output_types(struct v3d_compile * c)886 v3d_fixup_fs_output_types(struct v3d_compile *c)
887 {
888 nir_foreach_shader_out_variable(var, c->s) {
889 uint32_t mask = 0;
890
891 switch (var->data.location) {
892 case FRAG_RESULT_COLOR:
893 mask = ~0;
894 break;
895 case FRAG_RESULT_DATA0:
896 case FRAG_RESULT_DATA1:
897 case FRAG_RESULT_DATA2:
898 case FRAG_RESULT_DATA3:
899 mask = 1 << (var->data.location - FRAG_RESULT_DATA0);
900 break;
901 }
902
903 if (c->fs_key->int_color_rb & mask) {
904 var->type =
905 glsl_vector_type(GLSL_TYPE_INT,
906 glsl_get_components(var->type));
907 } else if (c->fs_key->uint_color_rb & mask) {
908 var->type =
909 glsl_vector_type(GLSL_TYPE_UINT,
910 glsl_get_components(var->type));
911 }
912 }
913 }
914
915 static void
v3d_nir_lower_fs_early(struct v3d_compile * c)916 v3d_nir_lower_fs_early(struct v3d_compile *c)
917 {
918 if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
919 v3d_fixup_fs_output_types(c);
920
921 NIR_PASS_V(c->s, v3d_nir_lower_logic_ops, c);
922
923 if (c->fs_key->line_smoothing) {
924 v3d_nir_lower_line_smooth(c->s);
925 NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
926 /* The lowering pass can introduce new sysval reads */
927 nir_shader_gather_info(c->s, nir_shader_get_entrypoint(c->s));
928 }
929
930 /* If the shader has no non-TLB side effects, we can promote it to
931 * enabling early_fragment_tests even if the user didn't.
932 */
933 if (!(c->s->info.num_images ||
934 c->s->info.num_ssbos)) {
935 c->s->info.fs.early_fragment_tests = true;
936 }
937 }
938
939 static void
v3d_nir_lower_gs_late(struct v3d_compile * c)940 v3d_nir_lower_gs_late(struct v3d_compile *c)
941 {
942 if (c->key->ucp_enables) {
943 NIR_PASS_V(c->s, nir_lower_clip_gs, c->key->ucp_enables,
944 false, NULL);
945 }
946
947 /* Note: GS output scalarizing must happen after nir_lower_clip_gs. */
948 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
949 }
950
951 static void
v3d_nir_lower_vs_late(struct v3d_compile * c)952 v3d_nir_lower_vs_late(struct v3d_compile *c)
953 {
954 if (c->vs_key->clamp_color)
955 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
956
957 if (c->key->ucp_enables) {
958 NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables,
959 false, false, NULL);
960 NIR_PASS_V(c->s, nir_lower_io_to_scalar,
961 nir_var_shader_out);
962 }
963
964 /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
965 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
966 }
967
968 static void
v3d_nir_lower_fs_late(struct v3d_compile * c)969 v3d_nir_lower_fs_late(struct v3d_compile *c)
970 {
971 if (c->fs_key->light_twoside)
972 NIR_PASS_V(c->s, nir_lower_two_sided_color, true);
973
974 if (c->fs_key->clamp_color)
975 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
976
977 /* In OpenGL the fragment shader can't read gl_ClipDistance[], but
978 * Vulkan allows it, in which case the SPIR-V compiler will declare
979 * VARING_SLOT_CLIP_DIST0 as compact array variable. Pass true as
980 * the last parameter to always operate with a compact array in both
981 * OpenGL and Vulkan so we do't have to care about the API we
982 * are using.
983 */
984 if (c->key->ucp_enables)
985 NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables, true);
986
987 /* Note: FS input scalarizing must happen after
988 * nir_lower_two_sided_color, which only handles a vec4 at a time.
989 */
990 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
991 }
992
993 static uint32_t
vir_get_max_temps(struct v3d_compile * c)994 vir_get_max_temps(struct v3d_compile *c)
995 {
996 int max_ip = 0;
997 vir_for_each_inst_inorder(inst, c)
998 max_ip++;
999
1000 uint32_t *pressure = rzalloc_array(NULL, uint32_t, max_ip);
1001
1002 for (int t = 0; t < c->num_temps; t++) {
1003 for (int i = c->temp_start[t]; (i < c->temp_end[t] &&
1004 i < max_ip); i++) {
1005 if (i > max_ip)
1006 break;
1007 pressure[i]++;
1008 }
1009 }
1010
1011 uint32_t max_temps = 0;
1012 for (int i = 0; i < max_ip; i++)
1013 max_temps = MAX2(max_temps, pressure[i]);
1014
1015 ralloc_free(pressure);
1016
1017 return max_temps;
1018 }
1019
1020 enum v3d_dependency_class {
1021 V3D_DEPENDENCY_CLASS_GS_VPM_OUTPUT_0
1022 };
1023
1024 static bool
v3d_intrinsic_dependency_cb(nir_intrinsic_instr * intr,nir_schedule_dependency * dep,void * user_data)1025 v3d_intrinsic_dependency_cb(nir_intrinsic_instr *intr,
1026 nir_schedule_dependency *dep,
1027 void *user_data)
1028 {
1029 struct v3d_compile *c = user_data;
1030
1031 switch (intr->intrinsic) {
1032 case nir_intrinsic_store_output:
1033 /* Writing to location 0 overwrites the value passed in for
1034 * gl_PrimitiveID on geometry shaders
1035 */
1036 if (c->s->info.stage != MESA_SHADER_GEOMETRY ||
1037 nir_intrinsic_base(intr) != 0)
1038 break;
1039
1040 nir_const_value *const_value =
1041 nir_src_as_const_value(intr->src[1]);
1042
1043 if (const_value == NULL)
1044 break;
1045
1046 uint64_t offset =
1047 nir_const_value_as_uint(*const_value,
1048 nir_src_bit_size(intr->src[1]));
1049 if (offset != 0)
1050 break;
1051
1052 dep->klass = V3D_DEPENDENCY_CLASS_GS_VPM_OUTPUT_0;
1053 dep->type = NIR_SCHEDULE_WRITE_DEPENDENCY;
1054 return true;
1055
1056 case nir_intrinsic_load_primitive_id:
1057 if (c->s->info.stage != MESA_SHADER_GEOMETRY)
1058 break;
1059
1060 dep->klass = V3D_DEPENDENCY_CLASS_GS_VPM_OUTPUT_0;
1061 dep->type = NIR_SCHEDULE_READ_DEPENDENCY;
1062 return true;
1063
1064 default:
1065 break;
1066 }
1067
1068 return false;
1069 }
1070
1071 static void
v3d_attempt_compile(struct v3d_compile * c)1072 v3d_attempt_compile(struct v3d_compile *c)
1073 {
1074 switch (c->s->info.stage) {
1075 case MESA_SHADER_VERTEX:
1076 c->vs_key = (struct v3d_vs_key *) c->key;
1077 break;
1078 case MESA_SHADER_GEOMETRY:
1079 c->gs_key = (struct v3d_gs_key *) c->key;
1080 break;
1081 case MESA_SHADER_FRAGMENT:
1082 c->fs_key = (struct v3d_fs_key *) c->key;
1083 break;
1084 case MESA_SHADER_COMPUTE:
1085 break;
1086 default:
1087 unreachable("unsupported shader stage");
1088 }
1089
1090 switch (c->s->info.stage) {
1091 case MESA_SHADER_VERTEX:
1092 v3d_nir_lower_vs_early(c);
1093 break;
1094 case MESA_SHADER_GEOMETRY:
1095 v3d_nir_lower_gs_early(c);
1096 break;
1097 case MESA_SHADER_FRAGMENT:
1098 v3d_nir_lower_fs_early(c);
1099 break;
1100 default:
1101 break;
1102 }
1103
1104 v3d_lower_nir(c);
1105
1106 switch (c->s->info.stage) {
1107 case MESA_SHADER_VERTEX:
1108 v3d_nir_lower_vs_late(c);
1109 break;
1110 case MESA_SHADER_GEOMETRY:
1111 v3d_nir_lower_gs_late(c);
1112 break;
1113 case MESA_SHADER_FRAGMENT:
1114 v3d_nir_lower_fs_late(c);
1115 break;
1116 default:
1117 break;
1118 }
1119
1120 NIR_PASS_V(c->s, v3d_nir_lower_io, c);
1121 NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
1122 NIR_PASS_V(c->s, v3d_nir_lower_image_load_store);
1123 NIR_PASS_V(c->s, nir_lower_idiv, nir_lower_idiv_fast);
1124
1125 if (c->key->robust_buffer_access) {
1126 /* v3d_nir_lower_robust_buffer_access assumes constant buffer
1127 * indices on ubo/ssbo intrinsics so run a copy propagation pass
1128 * before we run the lowering to warrant this. We also want to run
1129 * the lowering before v3d_optimize to clean-up redundant
1130 * get_buffer_size calls produced in the pass.
1131 */
1132 NIR_PASS_V(c->s, nir_copy_prop);
1133 NIR_PASS_V(c->s, v3d_nir_lower_robust_buffer_access, c);
1134 }
1135
1136 v3d_optimize_nir(c->s);
1137
1138 /* Do late algebraic optimization to turn add(a, neg(b)) back into
1139 * subs, then the mandatory cleanup after algebraic. Note that it may
1140 * produce fnegs, and if so then we need to keep running to squash
1141 * fneg(fneg(a)).
1142 */
1143 bool more_late_algebraic = true;
1144 while (more_late_algebraic) {
1145 more_late_algebraic = false;
1146 NIR_PASS(more_late_algebraic, c->s, nir_opt_algebraic_late);
1147 NIR_PASS_V(c->s, nir_opt_constant_folding);
1148 NIR_PASS_V(c->s, nir_copy_prop);
1149 NIR_PASS_V(c->s, nir_opt_dce);
1150 NIR_PASS_V(c->s, nir_opt_cse);
1151 }
1152
1153 NIR_PASS_V(c->s, nir_lower_bool_to_int32);
1154 NIR_PASS_V(c->s, nir_convert_from_ssa, true);
1155
1156 struct nir_schedule_options schedule_options = {
1157 /* Schedule for about half our register space, to enable more
1158 * shaders to hit 4 threads.
1159 */
1160 .threshold = 24,
1161
1162 /* Vertex shaders share the same memory for inputs and outputs,
1163 * fragement and geometry shaders do not.
1164 */
1165 .stages_with_shared_io_memory =
1166 (((1 << MESA_ALL_SHADER_STAGES) - 1) &
1167 ~((1 << MESA_SHADER_FRAGMENT) |
1168 (1 << MESA_SHADER_GEOMETRY))),
1169
1170 .fallback = c->fallback_scheduler,
1171
1172 .intrinsic_cb = v3d_intrinsic_dependency_cb,
1173 .intrinsic_cb_data = c,
1174 };
1175 NIR_PASS_V(c->s, nir_schedule, &schedule_options);
1176
1177 v3d_nir_to_vir(c);
1178 }
1179
1180 uint32_t
v3d_prog_data_size(gl_shader_stage stage)1181 v3d_prog_data_size(gl_shader_stage stage)
1182 {
1183 static const int prog_data_size[] = {
1184 [MESA_SHADER_VERTEX] = sizeof(struct v3d_vs_prog_data),
1185 [MESA_SHADER_GEOMETRY] = sizeof(struct v3d_gs_prog_data),
1186 [MESA_SHADER_FRAGMENT] = sizeof(struct v3d_fs_prog_data),
1187 [MESA_SHADER_COMPUTE] = sizeof(struct v3d_compute_prog_data),
1188 };
1189
1190 assert(stage >= 0 &&
1191 stage < ARRAY_SIZE(prog_data_size) &&
1192 prog_data_size[stage]);
1193
1194 return prog_data_size[stage];
1195 }
1196
v3d_shaderdb_dump(struct v3d_compile * c,char ** shaderdb_str)1197 int v3d_shaderdb_dump(struct v3d_compile *c,
1198 char **shaderdb_str)
1199 {
1200 if (c == NULL)
1201 return -1;
1202
1203 return asprintf(shaderdb_str,
1204 "%s shader: %d inst, %d threads, %d loops, "
1205 "%d uniforms, %d max-temps, %d:%d spills:fills, "
1206 "%d sfu-stalls, %d inst-and-stalls",
1207 vir_get_stage_name(c),
1208 c->qpu_inst_count,
1209 c->threads,
1210 c->loops,
1211 c->num_uniforms,
1212 vir_get_max_temps(c),
1213 c->spills,
1214 c->fills,
1215 c->qpu_inst_stalled_count,
1216 c->qpu_inst_count + c->qpu_inst_stalled_count);
1217 }
1218
v3d_compile(const struct v3d_compiler * compiler,struct v3d_key * key,struct v3d_prog_data ** out_prog_data,nir_shader * s,void (* debug_output)(const char * msg,void * debug_output_data),void * debug_output_data,int program_id,int variant_id,uint32_t * final_assembly_size)1219 uint64_t *v3d_compile(const struct v3d_compiler *compiler,
1220 struct v3d_key *key,
1221 struct v3d_prog_data **out_prog_data,
1222 nir_shader *s,
1223 void (*debug_output)(const char *msg,
1224 void *debug_output_data),
1225 void *debug_output_data,
1226 int program_id, int variant_id,
1227 uint32_t *final_assembly_size)
1228 {
1229 struct v3d_compile *c;
1230
1231 for (int i = 0; true; i++) {
1232 c = vir_compile_init(compiler, key, s,
1233 debug_output, debug_output_data,
1234 program_id, variant_id,
1235 i > 0 /* fallback_scheduler */);
1236
1237 v3d_attempt_compile(c);
1238
1239 if (i > 0 ||
1240 c->compilation_result !=
1241 V3D_COMPILATION_FAILED_REGISTER_ALLOCATION)
1242 break;
1243
1244 char *debug_msg;
1245 int ret = asprintf(&debug_msg,
1246 "Using fallback scheduler for %s",
1247 vir_get_stage_name(c));
1248
1249 if (ret >= 0) {
1250 if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF))
1251 fprintf(stderr, "%s\n", debug_msg);
1252
1253 c->debug_output(debug_msg, c->debug_output_data);
1254 free(debug_msg);
1255 }
1256
1257 vir_compile_destroy(c);
1258 }
1259
1260 struct v3d_prog_data *prog_data;
1261
1262 prog_data = rzalloc_size(NULL, v3d_prog_data_size(c->s->info.stage));
1263
1264 v3d_set_prog_data(c, prog_data);
1265
1266 *out_prog_data = prog_data;
1267
1268 char *shaderdb;
1269 int ret = v3d_shaderdb_dump(c, &shaderdb);
1270 if (ret >= 0) {
1271 if (V3D_DEBUG & V3D_DEBUG_SHADERDB)
1272 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
1273
1274 c->debug_output(shaderdb, c->debug_output_data);
1275 free(shaderdb);
1276 }
1277
1278 return v3d_return_qpu_insts(c, final_assembly_size);
1279 }
1280
1281 void
vir_remove_instruction(struct v3d_compile * c,struct qinst * qinst)1282 vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
1283 {
1284 if (qinst->dst.file == QFILE_TEMP)
1285 c->defs[qinst->dst.index] = NULL;
1286
1287 assert(&qinst->link != c->cursor.link);
1288
1289 list_del(&qinst->link);
1290 free(qinst);
1291
1292 c->live_intervals_valid = false;
1293 }
1294
1295 struct qreg
vir_follow_movs(struct v3d_compile * c,struct qreg reg)1296 vir_follow_movs(struct v3d_compile *c, struct qreg reg)
1297 {
1298 /* XXX
1299 int pack = reg.pack;
1300
1301 while (reg.file == QFILE_TEMP &&
1302 c->defs[reg.index] &&
1303 (c->defs[reg.index]->op == QOP_MOV ||
1304 c->defs[reg.index]->op == QOP_FMOV) &&
1305 !c->defs[reg.index]->dst.pack &&
1306 !c->defs[reg.index]->src[0].pack) {
1307 reg = c->defs[reg.index]->src[0];
1308 }
1309
1310 reg.pack = pack;
1311 */
1312 return reg;
1313 }
1314
1315 void
vir_compile_destroy(struct v3d_compile * c)1316 vir_compile_destroy(struct v3d_compile *c)
1317 {
1318 /* Defuse the assert that we aren't removing the cursor's instruction.
1319 */
1320 c->cursor.link = NULL;
1321
1322 vir_for_each_block(block, c) {
1323 while (!list_is_empty(&block->instructions)) {
1324 struct qinst *qinst =
1325 list_first_entry(&block->instructions,
1326 struct qinst, link);
1327 vir_remove_instruction(c, qinst);
1328 }
1329 }
1330
1331 ralloc_free(c);
1332 }
1333
1334 uint32_t
vir_get_uniform_index(struct v3d_compile * c,enum quniform_contents contents,uint32_t data)1335 vir_get_uniform_index(struct v3d_compile *c,
1336 enum quniform_contents contents,
1337 uint32_t data)
1338 {
1339 for (int i = 0; i < c->num_uniforms; i++) {
1340 if (c->uniform_contents[i] == contents &&
1341 c->uniform_data[i] == data) {
1342 return i;
1343 }
1344 }
1345
1346 uint32_t uniform = c->num_uniforms++;
1347
1348 if (uniform >= c->uniform_array_size) {
1349 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
1350 c->uniform_array_size * 2);
1351
1352 c->uniform_data = reralloc(c, c->uniform_data,
1353 uint32_t,
1354 c->uniform_array_size);
1355 c->uniform_contents = reralloc(c, c->uniform_contents,
1356 enum quniform_contents,
1357 c->uniform_array_size);
1358 }
1359
1360 c->uniform_contents[uniform] = contents;
1361 c->uniform_data[uniform] = data;
1362
1363 return uniform;
1364 }
1365
1366 struct qreg
vir_uniform(struct v3d_compile * c,enum quniform_contents contents,uint32_t data)1367 vir_uniform(struct v3d_compile *c,
1368 enum quniform_contents contents,
1369 uint32_t data)
1370 {
1371 struct qinst *inst = vir_NOP(c);
1372 inst->qpu.sig.ldunif = true;
1373 inst->uniform = vir_get_uniform_index(c, contents, data);
1374 inst->dst = vir_get_temp(c);
1375 c->defs[inst->dst.index] = inst;
1376 return inst->dst;
1377 }
1378
1379 #define OPTPASS(func) \
1380 do { \
1381 bool stage_progress = func(c); \
1382 if (stage_progress) { \
1383 progress = true; \
1384 if (print_opt_debug) { \
1385 fprintf(stderr, \
1386 "VIR opt pass %2d: %s progress\n", \
1387 pass, #func); \
1388 } \
1389 /*XXX vir_validate(c);*/ \
1390 } \
1391 } while (0)
1392
1393 void
vir_optimize(struct v3d_compile * c)1394 vir_optimize(struct v3d_compile *c)
1395 {
1396 bool print_opt_debug = false;
1397 int pass = 1;
1398
1399 while (true) {
1400 bool progress = false;
1401
1402 OPTPASS(vir_opt_copy_propagate);
1403 OPTPASS(vir_opt_redundant_flags);
1404 OPTPASS(vir_opt_dead_code);
1405 OPTPASS(vir_opt_small_immediates);
1406
1407 if (!progress)
1408 break;
1409
1410 pass++;
1411 }
1412 }
1413
1414 const char *
vir_get_stage_name(struct v3d_compile * c)1415 vir_get_stage_name(struct v3d_compile *c)
1416 {
1417 if (c->vs_key && c->vs_key->is_coord)
1418 return "MESA_SHADER_VERTEX_BIN";
1419 else if (c->gs_key && c->gs_key->is_coord)
1420 return "MESA_SHADER_GEOMETRY_BIN";
1421 else
1422 return gl_shader_stage_name(c->s->info.stage);
1423 }
1424