1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define BCP_DEBUG 0
28
29 #if BCP_DEBUG
30 #define BCP_DUMP(q) do { q } while (0)
31 #else
32 #define BCP_DUMP(q)
33 #endif
34
35 #include "r600_pipe.h"
36 #include "r600_shader.h"
37 #include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1
38
39 #include <stack>
40
41 #include "sb_bc.h"
42 #include "sb_shader.h"
43 #include "sb_pass.h"
44 #include "util/macros.h"
45
46 namespace r600_sb {
47
decode()48 int bc_parser::decode() {
49
50 dw = bc->bytecode;
51 bc_ndw = bc->ndw;
52 max_cf = 0;
53
54 dec = new bc_decoder(ctx, dw, bc_ndw);
55
56 shader_target t = TARGET_UNKNOWN;
57
58 if (pshader) {
59 switch (bc->type) {
60 case PIPE_SHADER_FRAGMENT: t = TARGET_PS; break;
61 case PIPE_SHADER_VERTEX:
62 t = pshader->vs_as_ls ? TARGET_LS : (pshader->vs_as_es ? TARGET_ES : TARGET_VS);
63 break;
64 case PIPE_SHADER_GEOMETRY: t = TARGET_GS; break;
65 case PIPE_SHADER_COMPUTE: t = TARGET_COMPUTE; break;
66 case PIPE_SHADER_TESS_CTRL: t = TARGET_HS; break;
67 case PIPE_SHADER_TESS_EVAL: t = pshader->tes_as_es ? TARGET_ES : TARGET_VS; break;
68 default: assert(!"unknown shader target"); return -1; break;
69 }
70 } else {
71 if (bc->type == PIPE_SHADER_COMPUTE)
72 t = TARGET_COMPUTE;
73 else
74 t = TARGET_FETCH;
75 }
76
77 sh = new shader(ctx, t, bc->debug_id);
78 sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE || bc->precise);
79
80 int r = decode_shader();
81
82 delete dec;
83
84 sh->ngpr = bc->ngpr;
85 sh->nstack = bc->nstack;
86
87 return r;
88 }
89
decode_shader()90 int bc_parser::decode_shader() {
91 int r = 0;
92 unsigned i = 0;
93 bool eop = false;
94
95 sh->init();
96
97 do {
98 eop = false;
99 if ((r = decode_cf(i, eop)))
100 return r;
101
102 } while (!eop || (i >> 1) < max_cf);
103
104 return 0;
105 }
106
prepare()107 int bc_parser::prepare() {
108 int r = 0;
109 if ((r = parse_decls()))
110 return r;
111 if ((r = prepare_ir()))
112 return r;
113 return 0;
114 }
115
parse_decls()116 int bc_parser::parse_decls() {
117
118 if (!pshader) {
119 if (gpr_reladdr)
120 sh->add_gpr_array(0, bc->ngpr, 0x0F);
121
122 // compute shaders have some values preloaded in R0, R1
123 sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
124 sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */);
125 return 0;
126 }
127
128 if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) {
129
130 assert(pshader->num_arrays);
131
132 if (pshader->num_arrays) {
133 for (unsigned i = 0; i < pshader->num_arrays; ++i) {
134 r600_shader_array &a = pshader->arrays[i];
135 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
136 }
137 } else {
138 sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
139 }
140 }
141
142 // GS inputs can add indirect addressing
143 if (sh->target == TARGET_GS) {
144 if (pshader->num_arrays) {
145 for (unsigned i = 0; i < pshader->num_arrays; ++i) {
146 r600_shader_array &a = pshader->arrays[i];
147 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
148 }
149 }
150 }
151
152 if (sh->target == TARGET_VS || sh->target == TARGET_ES || sh->target == TARGET_HS || sh->target == TARGET_LS)
153 sh->add_input(0, 1, 0x0F);
154 else if (sh->target == TARGET_GS) {
155 sh->add_input(0, 1, 0x0F);
156 sh->add_input(1, 1, 0x0F);
157 } else if (sh->target == TARGET_COMPUTE) {
158 sh->add_input(0, 1, 0x0F);
159 sh->add_input(1, 1, 0x0F);
160 }
161
162 bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
163 && sh->target == TARGET_PS;
164
165 bool ij_interpolators[6];
166 memset(ij_interpolators, 0, sizeof(ij_interpolators));
167
168 for (unsigned i = 0; i < pshader->ninput; ++i) {
169 r600_shader_io & in = pshader->input[i];
170 bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
171 sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
172 if (ps_interp && in.spi_sid) {
173 int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location);
174 if (k >= 0) {
175 ij_interpolators[k] |= true;
176 if (in.uses_interpolate_at_centroid) {
177 k = eg_get_interpolator_index(in.interpolate, TGSI_INTERPOLATE_LOC_CENTROID);
178 ij_interpolators[k] |= true;
179 }
180 }
181 }
182 }
183
184 if (ps_interp) {
185 /* add the egcm ij interpolators to live inputs */
186 unsigned num_ij = 0;
187 for (unsigned i = 0; i < ARRAY_SIZE(ij_interpolators); i++) {
188 num_ij += ij_interpolators[i];
189 }
190
191 unsigned mask = (1 << (2 * num_ij)) - 1;
192 unsigned gpr = 0;
193
194 while (mask) {
195 sh->add_input(gpr, true, mask & 0x0F);
196 ++gpr;
197 mask >>= 4;
198 }
199 }
200
201 return 0;
202 }
203
decode_cf(unsigned & i,bool & eop)204 int bc_parser::decode_cf(unsigned &i, bool &eop) {
205
206 int r;
207
208 cf_node *cf = sh->create_cf();
209 sh->root->push_back(cf);
210
211 unsigned id = i >> 1;
212
213 cf->bc.id = id;
214
215 if (cf_map.size() < id + 1)
216 cf_map.resize(id + 1);
217
218 cf_map[id] = cf;
219
220 if ((r = dec->decode_cf(i, cf->bc)))
221 return r;
222
223 cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
224
225 if (flags & CF_ALU) {
226 if ((r = decode_alu_clause(cf)))
227 return r;
228 } else if (flags & CF_FETCH) {
229 if ((r = decode_fetch_clause(cf)))
230 return r;
231 } else if (flags & CF_EXP) {
232 if (cf->bc.rw_rel)
233 gpr_reladdr = true;
234 assert(!cf->bc.rw_rel);
235 } else if (flags & CF_MEM) {
236 if (cf->bc.rw_rel)
237 gpr_reladdr = true;
238 assert(!cf->bc.rw_rel);
239 } else if (flags & CF_BRANCH) {
240 if (cf->bc.addr > max_cf)
241 max_cf = cf->bc.addr;
242 }
243
244 eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END ||
245 cf->bc.op == CF_OP_RET;
246 return 0;
247 }
248
decode_alu_clause(cf_node * cf)249 int bc_parser::decode_alu_clause(cf_node* cf) {
250 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
251
252 cf->subtype = NST_ALU_CLAUSE;
253
254 cgroup = 0;
255 memset(slots[0], 0, 5*sizeof(slots[0][0]));
256
257 unsigned ng = 0;
258
259 do {
260 decode_alu_group(cf, i, gcnt);
261 assert(gcnt <= cnt);
262 cnt -= gcnt;
263 ng++;
264 } while (cnt);
265
266 return 0;
267 }
268
decode_alu_group(cf_node * cf,unsigned & i,unsigned & gcnt)269 int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
270 int r;
271 alu_node *n;
272 alu_group_node *g = sh->create_alu_group();
273
274 cgroup = !cgroup;
275 memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
276 gcnt = 0;
277
278 unsigned literal_mask = 0;
279
280 do {
281 n = sh->create_alu();
282 g->push_back(n);
283
284 if ((r = dec->decode_alu(i, n->bc)))
285 return r;
286
287 if (!sh->assign_slot(n, slots[cgroup])) {
288 assert(!"alu slot assignment failed");
289 return -1;
290 }
291
292 gcnt++;
293
294 } while (gcnt <= 5 && !n->bc.last);
295
296 assert(n->bc.last);
297
298 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
299 n = static_cast<alu_node*>(*I);
300
301 if (n->bc.dst_rel)
302 gpr_reladdr = true;
303
304 for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
305 bc_alu_src &src = n->bc.src[k];
306 if (src.rel)
307 gpr_reladdr = true;
308 if (src.sel == ALU_SRC_LITERAL) {
309 literal_mask |= (1 << src.chan);
310 src.value.u = dw[i + src.chan];
311 }
312 }
313 }
314
315 unsigned literal_ndw = 0;
316 while (literal_mask) {
317 g->literals.push_back(dw[i + literal_ndw]);
318 literal_ndw += 1;
319 literal_mask >>= 1;
320 }
321
322 literal_ndw = (literal_ndw + 1) & ~1u;
323
324 i += literal_ndw;
325 gcnt += literal_ndw >> 1;
326
327 cf->push_back(g);
328 return 0;
329 }
330
prepare_alu_clause(cf_node * cf)331 int bc_parser::prepare_alu_clause(cf_node* cf) {
332
333 // loop over alu groups
334 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
335 assert(I->subtype == NST_ALU_GROUP);
336 alu_group_node *g = static_cast<alu_group_node*>(*I);
337 prepare_alu_group(cf, g);
338 }
339
340 return 0;
341 }
342
save_set_cf_index(value * val,unsigned idx)343 void bc_parser::save_set_cf_index(value *val, unsigned idx)
344 {
345 assert(idx <= 1);
346 assert(val);
347 cf_index_value[idx] = val;
348 }
get_cf_index_value(unsigned idx)349 value *bc_parser::get_cf_index_value(unsigned idx)
350 {
351 assert(idx <= 1);
352 assert(cf_index_value[idx]);
353 return cf_index_value[idx];
354 }
save_mova(alu_node * mova)355 void bc_parser::save_mova(alu_node *mova)
356 {
357 assert(mova);
358 this->mova = mova;
359 }
get_mova()360 alu_node *bc_parser::get_mova()
361 {
362 assert(mova);
363 return mova;
364 }
365
prepare_alu_group(cf_node * cf,alu_group_node * g)366 int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
367
368 alu_node *n;
369
370 cgroup = !cgroup;
371 memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
372
373 for (node_iterator I = g->begin(), E = g->end();
374 I != E; ++I) {
375 n = static_cast<alu_node*>(*I);
376 bool ubo_indexing[2] = {};
377
378 if (!sh->assign_slot(n, slots[cgroup])) {
379 assert(!"alu slot assignment failed");
380 return -1;
381 }
382
383 unsigned src_count = n->bc.op_ptr->src_count;
384
385 if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
386 n->flags |= NF_ALU_4SLOT;
387
388 if (ctx.alu_slots(n->bc.op) & AF_2SLOT)
389 n->flags |= NF_ALU_2SLOT;
390
391 n->src.resize(src_count);
392
393 unsigned flags = n->bc.op_ptr->flags;
394
395 if (flags & AF_LDS) {
396 bool need_rw = false, need_oqa = false, need_oqb = false;
397 int ndst = 0, ncount = 0;
398
399 /* all non-read operations have side effects */
400 if (n->bc.op != LDS_OP2_LDS_READ2_RET &&
401 n->bc.op != LDS_OP1_LDS_READ_REL_RET &&
402 n->bc.op != LDS_OP1_LDS_READ_RET) {
403 n->flags |= NF_DONT_KILL;
404 ndst++;
405 need_rw = true;
406 }
407
408 if (n->bc.op >= LDS_OP2_LDS_ADD_RET && n->bc.op <= LDS_OP1_LDS_USHORT_READ_RET) {
409 need_oqa = true;
410 ndst++;
411 }
412
413 if (n->bc.op == LDS_OP2_LDS_READ2_RET || n->bc.op == LDS_OP1_LDS_READ_REL_RET) {
414 need_oqb = true;
415 ndst++;
416 }
417
418 n->dst.resize(ndst);
419 if (need_oqa)
420 n->dst[ncount++] = sh->get_special_value(SV_LDS_OQA);
421 if (need_oqb)
422 n->dst[ncount++] = sh->get_special_value(SV_LDS_OQB);
423 if (need_rw)
424 n->dst[ncount++] = sh->get_special_value(SV_LDS_RW);
425
426 n->flags |= NF_DONT_MOVE | NF_DONT_HOIST;
427
428 } else if (flags & AF_PRED) {
429 n->dst.resize(3);
430 if (n->bc.update_pred)
431 n->dst[1] = sh->get_special_value(SV_ALU_PRED);
432 if (n->bc.update_exec_mask)
433 n->dst[2] = sh->get_special_value(SV_EXEC_MASK);
434
435 n->flags |= NF_DONT_HOIST;
436
437 } else if (flags & AF_KILL) {
438
439 n->dst.resize(2);
440 n->dst[1] = sh->get_special_value(SV_VALID_MASK);
441 sh->set_uses_kill();
442
443 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE |
444 NF_DONT_KILL | NF_SCHEDULE_EARLY;
445
446 } else {
447 n->dst.resize(1);
448 }
449
450 if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) {
451 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
452 // DCE will kill this op
453 save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1);
454 } else if (flags & AF_MOVA) {
455
456 n->dst[0] = sh->get_special_value(SV_AR_INDEX);
457 save_mova(n);
458
459 n->flags |= NF_DONT_HOIST;
460
461 } else if ((n->bc.op_ptr->src_count == 3 || n->bc.write_mask) && !(flags & AF_LDS)) {
462 assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X);
463
464 value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan,
465 n->bc.dst_rel);
466
467 n->dst[0] = v;
468 }
469
470 if (n->bc.pred_sel) {
471 sh->has_alu_predication = true;
472 n->pred = sh->get_special_value(SV_ALU_PRED);
473 }
474
475 for (unsigned s = 0; s < src_count; ++s) {
476 bc_alu_src &src = n->bc.src[s];
477
478 if (src.sel == ALU_SRC_LITERAL) {
479 n->src[s] = sh->get_const_value(src.value);
480 } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
481 unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
482 ((unsigned)SLOT_TRANS) : src.chan;
483
484 // XXX shouldn't happen but llvm backend uses PS on cayman
485 if (prev_slot == SLOT_TRANS && ctx.is_cayman())
486 prev_slot = SLOT_X;
487
488 alu_node *prev_alu = slots[pgroup][prev_slot];
489
490 assert(prev_alu);
491
492 if (!prev_alu->dst[0]) {
493 value * t = sh->create_temp_value();
494 prev_alu->dst[0] = t;
495 }
496
497 value *d = prev_alu->dst[0];
498
499 if (d->is_rel()) {
500 d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr,
501 prev_alu->bc.dst_chan,
502 prev_alu->bc.dst_rel);
503 }
504
505 n->src[s] = d;
506 } else if (ctx.is_kcache_sel(src.sel)) {
507 unsigned sel = src.sel, kc_addr;
508 unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1);
509
510 bc_kcache &kc = cf->bc.kc[kc_set];
511 kc_addr = (kc.addr << 4) + (sel & 0x1F);
512 n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode);
513
514 if (kc.index_mode != KC_INDEX_NONE) {
515 assert(kc.index_mode != KC_LOCK_LOOP);
516 ubo_indexing[kc.index_mode - KC_INDEX_0] = true;
517 }
518 } else if (src.sel < MAX_GPR) {
519 value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel);
520
521 n->src[s] = v;
522
523 } else if (src.sel >= ALU_SRC_PARAM_OFFSET) {
524 // using slot for value channel because in fact the slot
525 // determines the channel that is loaded by INTERP_LOAD_P0
526 // (and maybe some others).
527 // otherwise GVN will consider INTERP_LOAD_P0s with the same
528 // param index as equal instructions and leave only one of them
529 n->src[s] = sh->get_special_ro_value(sel_chan(src.sel,
530 n->bc.slot));
531 } else if (ctx.is_lds_oq(src.sel)) {
532 switch (src.sel) {
533 case ALU_SRC_LDS_OQ_A:
534 case ALU_SRC_LDS_OQ_B:
535 assert(!"Unsupported LDS queue access in SB");
536 break;
537 case ALU_SRC_LDS_OQ_A_POP:
538 n->src[s] = sh->get_special_value(SV_LDS_OQA);
539 break;
540 case ALU_SRC_LDS_OQ_B_POP:
541 n->src[s] = sh->get_special_value(SV_LDS_OQB);
542 break;
543 }
544 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
545
546 } else {
547 switch (src.sel) {
548 case ALU_SRC_0:
549 n->src[s] = sh->get_const_value(0);
550 break;
551 case ALU_SRC_0_5:
552 n->src[s] = sh->get_const_value(0.5f);
553 break;
554 case ALU_SRC_1:
555 n->src[s] = sh->get_const_value(1.0f);
556 break;
557 case ALU_SRC_1_INT:
558 n->src[s] = sh->get_const_value(1);
559 break;
560 case ALU_SRC_M_1_INT:
561 n->src[s] = sh->get_const_value(-1);
562 break;
563 default:
564 n->src[s] = sh->get_special_ro_value(src.sel);
565 break;
566 }
567 }
568 }
569
570 // add UBO index values if any as dependencies
571 if (ubo_indexing[0]) {
572 n->src.push_back(get_cf_index_value(0));
573 }
574 if (ubo_indexing[1]) {
575 n->src.push_back(get_cf_index_value(1));
576 }
577
578 if ((flags & AF_MOVA) && (n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) &&
579 ctx.is_cayman())
580 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
581 save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1);
582 }
583
584 // pack multislot instructions into alu_packed_node
585
586 alu_packed_node *p = NULL;
587 for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) {
588 N = I + 1;
589 alu_node *a = static_cast<alu_node*>(*I);
590 unsigned sflags = a->bc.slot_flags;
591
592 if (sflags == AF_4V || sflags == AF_2V || (ctx.is_cayman() && sflags == AF_S)) {
593 if (!p)
594 p = sh->create_alu_packed();
595
596 a->remove();
597 p->push_back(a);
598 if (sflags == AF_2V && p->count() == 2) {
599 g->push_front(p);
600 p = NULL;
601 }
602 }
603 }
604
605 if (p) {
606 g->push_front(p);
607
608 if (p->count() == 3 && ctx.is_cayman()) {
609 // cayman's scalar instruction that can use 3 or 4 slots
610
611 // FIXME for simplicity we'll always add 4th slot,
612 // but probably we might want to always remove 4th slot and make
613 // sure that regalloc won't choose 'w' component for dst
614
615 alu_node *f = static_cast<alu_node*>(p->first);
616 alu_node *a = sh->create_alu();
617 a->src = f->src;
618 a->dst.resize(f->dst.size());
619 a->bc = f->bc;
620 a->bc.slot = SLOT_W;
621 p->push_back(a);
622 }
623 }
624
625 return 0;
626 }
627
decode_fetch_clause(cf_node * cf)628 int bc_parser::decode_fetch_clause(cf_node* cf) {
629 int r;
630 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
631
632 if (cf->bc.op_ptr->flags & FF_GDS)
633 cf->subtype = NST_GDS_CLAUSE;
634 else
635 cf->subtype = NST_TEX_CLAUSE;
636
637 while (cnt--) {
638 fetch_node *n = sh->create_fetch();
639 cf->push_back(n);
640 if ((r = dec->decode_fetch(i, n->bc)))
641 return r;
642 if (n->bc.src_rel || n->bc.dst_rel)
643 gpr_reladdr = true;
644
645 }
646 return 0;
647 }
648
prepare_fetch_clause(cf_node * cf)649 int bc_parser::prepare_fetch_clause(cf_node *cf) {
650
651 vvec grad_v, grad_h, texture_offsets;
652
653 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
654
655 fetch_node *n = static_cast<fetch_node*>(*I);
656 assert(n->is_valid());
657
658 unsigned flags = n->bc.op_ptr->flags;
659
660 unsigned vtx = flags & FF_VTX;
661 unsigned gds = flags & FF_GDS;
662 unsigned num_src = gds ? 2 : vtx ? ctx.vtx_src_num : 4;
663
664 n->dst.resize(4);
665
666 if (gds) {
667 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL;
668 }
669 if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) {
670 sh->uses_gradients = true;
671 }
672
673 if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) {
674
675 vvec *grad = NULL;
676
677 switch (n->bc.op) {
678 case FETCH_OP_SET_GRADIENTS_V:
679 grad = &grad_v;
680 break;
681 case FETCH_OP_SET_GRADIENTS_H:
682 grad = &grad_h;
683 break;
684 case FETCH_OP_SET_TEXTURE_OFFSETS:
685 grad = &texture_offsets;
686 break;
687 default:
688 assert(!"unexpected SET_GRAD instruction");
689 return -1;
690 }
691
692 if (grad->empty())
693 grad->resize(4);
694
695 for(unsigned s = 0; s < 4; ++s) {
696 unsigned sw = n->bc.src_sel[s];
697 if (sw <= SEL_W)
698 (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr,
699 sw, false);
700 else if (sw == SEL_0)
701 (*grad)[s] = sh->get_const_value(0.0f);
702 else if (sw == SEL_1)
703 (*grad)[s] = sh->get_const_value(1.0f);
704 }
705 } else {
706 // Fold source values for instructions with hidden target values in to the instructions
707 // using them. The set instructions are later re-emitted by bc_finalizer
708 if (flags & FF_USEGRAD) {
709 n->src.resize(12);
710 std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4);
711 std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8);
712 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
713 n->src.resize(8);
714 std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4);
715 } else {
716 n->src.resize(4);
717 }
718
719 for(int s = 0; s < 4; ++s) {
720 if (n->bc.dst_sel[s] != SEL_MASK)
721 n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false);
722 // NOTE: it doesn't matter here which components of the result we
723 // are using, but original n->bc.dst_sel should be taken into
724 // account when building the bytecode
725 }
726 for(unsigned s = 0; s < num_src; ++s) {
727 if (n->bc.src_sel[s] <= SEL_W)
728 n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr,
729 n->bc.src_sel[s], false);
730 }
731
732 // Scheduler will emit the appropriate instructions to set CF_IDX0/1
733 if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
734 n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1));
735 }
736 if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
737 n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1));
738 }
739 }
740
741 if (n->bc.op == FETCH_OP_READ_SCRATCH) {
742 n->src.push_back(sh->get_special_value(SV_SCRATCH));
743 n->dst.push_back(sh->get_special_value(SV_SCRATCH));
744 }
745 }
746
747 return 0;
748 }
749
prepare_ir()750 int bc_parser::prepare_ir() {
751
752 for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) {
753 cf_node *c = *I;
754
755 if (!c)
756 continue;
757
758 unsigned flags = c->bc.op_ptr->flags;
759
760 if (flags & CF_ALU) {
761 prepare_alu_clause(c);
762 } else if (flags & CF_FETCH) {
763 prepare_fetch_clause(c);
764 } else if (c->bc.op == CF_OP_CALL_FS) {
765 sh->init_call_fs(c);
766 c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
767 } else if (flags & CF_LOOP_START) {
768 prepare_loop(c);
769 } else if (c->bc.op == CF_OP_JUMP) {
770 prepare_if(c);
771 } else if (c->bc.op == CF_OP_LOOP_END) {
772 loop_stack.pop();
773 } else if (c->bc.op == CF_OP_LOOP_CONTINUE) {
774 assert(!loop_stack.empty());
775 repeat_node *rep = sh->create_repeat(loop_stack.top());
776 if (c->parent->first != c)
777 rep->move(c->parent->first, c);
778 c->replace_with(rep);
779 sh->simplify_dep_rep(rep);
780 } else if (c->bc.op == CF_OP_LOOP_BREAK) {
781 assert(!loop_stack.empty());
782 depart_node *dep = sh->create_depart(loop_stack.top());
783 if (c->parent->first != c)
784 dep->move(c->parent->first, c);
785 c->replace_with(dep);
786 sh->simplify_dep_rep(dep);
787 } else if (flags & CF_EXP) {
788
789 // unroll burst exports
790
791 assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE);
792
793 c->bc.set_op(CF_OP_EXPORT);
794
795 unsigned burst_count = c->bc.burst_count;
796 unsigned eop = c->bc.end_of_program;
797
798 c->bc.end_of_program = 0;
799 c->bc.burst_count = 0;
800
801 do {
802 c->src.resize(4);
803
804 for(int s = 0; s < 4; ++s) {
805 switch (c->bc.sel[s]) {
806 case SEL_0:
807 c->src[s] = sh->get_const_value(0.0f);
808 break;
809 case SEL_1:
810 c->src[s] = sh->get_const_value(1.0f);
811 break;
812 case SEL_MASK:
813 break;
814 default:
815 if (c->bc.sel[s] <= SEL_W)
816 c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr,
817 c->bc.sel[s], false);
818 else
819 assert(!"invalid src_sel for export");
820 }
821 }
822
823 if (!burst_count--)
824 break;
825
826 cf_node *cf_next = sh->create_cf();
827 cf_next->bc = c->bc;
828 ++cf_next->bc.rw_gpr;
829 ++cf_next->bc.array_base;
830
831 c->insert_after(cf_next);
832 c = cf_next;
833
834 } while (1);
835
836 c->bc.end_of_program = eop;
837 } else if (flags & CF_MEM) {
838
839 unsigned burst_count = c->bc.burst_count;
840 unsigned eop = c->bc.end_of_program;
841
842 c->bc.end_of_program = 0;
843 c->bc.burst_count = 0;
844
845 do {
846
847 if (ctx.hw_class == HW_CLASS_R600 && c->bc.op == CF_OP_MEM_SCRATCH &&
848 (c->bc.type == 2 || c->bc.type == 3)) {
849 c->dst.resize(4);
850 for(int s = 0; s < 4; ++s) {
851 if (c->bc.comp_mask & (1 << s))
852 c->dst[s] =
853 sh->get_gpr_value(true, c->bc.rw_gpr, s, false);
854 }
855 } else {
856 c->src.resize(4);
857
858
859 for(int s = 0; s < 4; ++s) {
860 if (c->bc.comp_mask & (1 << s))
861 c->src[s] =
862 sh->get_gpr_value(true, c->bc.rw_gpr, s, false);
863 }
864 }
865
866 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write
867 c->src.resize(8);
868 for(int s = 0; s < 3; ++s) {
869 c->src[4 + s] =
870 sh->get_gpr_value(true, c->bc.index_gpr, s, false);
871 }
872
873 // FIXME probably we can relax it a bit
874 c->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
875 }
876
877 if (flags & CF_EMIT) {
878 // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX
879 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
880 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
881 if (sh->target == TARGET_ES) {
882 // For ES shaders this is an export
883 c->flags |= NF_DONT_KILL;
884 }
885 }
886 else if (c->bc.op == CF_OP_MEM_SCRATCH) {
887 c->src.push_back(sh->get_special_value(SV_SCRATCH));
888 c->dst.push_back(sh->get_special_value(SV_SCRATCH));
889 }
890
891 if (!burst_count--)
892 break;
893
894 cf_node *cf_next = sh->create_cf();
895 cf_next->bc = c->bc;
896 ++cf_next->bc.rw_gpr;
897
898 // FIXME is it correct?
899 cf_next->bc.array_base += cf_next->bc.elem_size + 1;
900
901 c->insert_after(cf_next);
902 c = cf_next;
903 } while (1);
904
905 c->bc.end_of_program = eop;
906
907 } else if (flags & CF_EMIT) {
908 /* quick peephole */
909 cf_node *prev = static_cast<cf_node *>(c->prev);
910 if (c->bc.op == CF_OP_CUT_VERTEX &&
911 prev && prev->is_valid() &&
912 prev->bc.op == CF_OP_EMIT_VERTEX &&
913 c->bc.count == prev->bc.count) {
914 prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX);
915 prev->bc.end_of_program = c->bc.end_of_program;
916 c->remove();
917 }
918 else {
919 c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
920
921 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
922 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
923 }
924 } else if (c->bc.op == CF_OP_WAIT_ACK) {
925 c->src.push_back(sh->get_special_value(SV_SCRATCH));
926 c->dst.push_back(sh->get_special_value(SV_SCRATCH));
927 }
928 }
929
930 assert(loop_stack.empty());
931 return 0;
932 }
933
prepare_loop(cf_node * c)934 int bc_parser::prepare_loop(cf_node* c) {
935 assert(c->bc.addr-1 < cf_map.size());
936
937 cf_node *end = cf_map[c->bc.addr - 1];
938 assert(end->bc.op == CF_OP_LOOP_END);
939 assert(c->parent == end->parent);
940
941 region_node *reg = sh->create_region();
942 repeat_node *rep = sh->create_repeat(reg);
943
944 reg->push_back(rep);
945 c->insert_before(reg);
946 rep->move(c, end->next);
947
948 reg->src_loop = true;
949
950 loop_stack.push(reg);
951 return 0;
952 }
953
prepare_if(cf_node * c)954 int bc_parser::prepare_if(cf_node* c) {
955 assert(c->bc.addr-1 < cf_map.size());
956 cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
957
958 if (!end)
959 return 0; // not quite sure how this happens, malformed input?
960
961 BCP_DUMP(
962 sblog << "parsing JUMP @" << c->bc.id;
963 sblog << "\n";
964 );
965
966 if (end->bc.op == CF_OP_ELSE) {
967 BCP_DUMP(
968 sblog << " found ELSE : ";
969 dump::dump_op(end);
970 sblog << "\n";
971 );
972
973 c_else = end;
974 end = cf_map[c_else->bc.addr];
975 } else {
976 BCP_DUMP(
977 sblog << " no else\n";
978 );
979
980 c_else = end;
981 }
982
983 if (c_else->parent != c->parent)
984 c_else = NULL;
985
986 if (end && end->parent != c->parent)
987 end = NULL;
988
989 region_node *reg = sh->create_region();
990
991 depart_node *dep2 = sh->create_depart(reg);
992 depart_node *dep = sh->create_depart(reg);
993 if_node *n_if = sh->create_if();
994
995 c->insert_before(reg);
996
997 if (c_else != end)
998 dep->move(c_else, end);
999 dep2->move(c, end);
1000
1001 reg->push_back(dep);
1002 dep->push_front(n_if);
1003 n_if->push_back(dep2);
1004
1005 n_if->cond = sh->get_special_value(SV_EXEC_MASK);
1006
1007 return 0;
1008 }
1009
1010
1011 } // namespace r600_sb
1012