1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define BCP_DEBUG 0
28
29 #if BCP_DEBUG
30 #define BCP_DUMP(q) do { q } while (0)
31 #else
32 #define BCP_DUMP(q)
33 #endif
34
35 #include "r600_pipe.h"
36 #include "r600_shader.h"
37 #include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1
38
39 #include <stack>
40
41 #include "sb_bc.h"
42 #include "sb_shader.h"
43 #include "sb_pass.h"
44 #include "util/macros.h"
45
46 namespace r600_sb {
47
decode()48 int bc_parser::decode() {
49
50 dw = bc->bytecode;
51 bc_ndw = bc->ndw;
52 max_cf = 0;
53
54 dec = new bc_decoder(ctx, dw, bc_ndw);
55
56 shader_target t = TARGET_UNKNOWN;
57
58 if (pshader) {
59 switch (bc->type) {
60 case PIPE_SHADER_FRAGMENT: t = TARGET_PS; break;
61 case PIPE_SHADER_VERTEX:
62 t = pshader->vs_as_ls ? TARGET_LS : (pshader->vs_as_es ? TARGET_ES : TARGET_VS);
63 break;
64 case PIPE_SHADER_GEOMETRY: t = TARGET_GS; break;
65 case PIPE_SHADER_COMPUTE: t = TARGET_COMPUTE; break;
66 case PIPE_SHADER_TESS_CTRL: t = TARGET_HS; break;
67 case PIPE_SHADER_TESS_EVAL: t = pshader->tes_as_es ? TARGET_ES : TARGET_VS; break;
68 default: assert(!"unknown shader target"); return -1; break;
69 }
70 } else {
71 if (bc->type == PIPE_SHADER_COMPUTE)
72 t = TARGET_COMPUTE;
73 else
74 t = TARGET_FETCH;
75 }
76
77 sh = new shader(ctx, t, bc->debug_id);
78 sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE);
79
80 int r = decode_shader();
81
82 delete dec;
83
84 sh->ngpr = bc->ngpr;
85 sh->nstack = bc->nstack;
86
87 return r;
88 }
89
decode_shader()90 int bc_parser::decode_shader() {
91 int r = 0;
92 unsigned i = 0;
93 bool eop = false;
94
95 sh->init();
96
97 do {
98 eop = false;
99 if ((r = decode_cf(i, eop)))
100 return r;
101
102 } while (!eop || (i >> 1) < max_cf);
103
104 return 0;
105 }
106
prepare()107 int bc_parser::prepare() {
108 int r = 0;
109 if ((r = parse_decls()))
110 return r;
111 if ((r = prepare_ir()))
112 return r;
113 return 0;
114 }
115
parse_decls()116 int bc_parser::parse_decls() {
117
118 if (!pshader) {
119 if (gpr_reladdr)
120 sh->add_gpr_array(0, bc->ngpr, 0x0F);
121
122 // compute shaders have some values preloaded in R0, R1
123 sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
124 sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */);
125 return 0;
126 }
127
128 if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) {
129
130 assert(pshader->num_arrays);
131
132 if (pshader->num_arrays) {
133 for (unsigned i = 0; i < pshader->num_arrays; ++i) {
134 r600_shader_array &a = pshader->arrays[i];
135 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
136 }
137 } else {
138 sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
139 }
140 }
141
142 // GS inputs can add indirect addressing
143 if (sh->target == TARGET_GS) {
144 if (pshader->num_arrays) {
145 for (unsigned i = 0; i < pshader->num_arrays; ++i) {
146 r600_shader_array &a = pshader->arrays[i];
147 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
148 }
149 }
150 }
151
152 if (sh->target == TARGET_VS || sh->target == TARGET_ES || sh->target == TARGET_HS || sh->target == TARGET_LS)
153 sh->add_input(0, 1, 0x0F);
154 else if (sh->target == TARGET_GS) {
155 sh->add_input(0, 1, 0x0F);
156 sh->add_input(1, 1, 0x0F);
157 } else if (sh->target == TARGET_COMPUTE) {
158 sh->add_input(0, 1, 0x0F);
159 sh->add_input(1, 1, 0x0F);
160 }
161
162 bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
163 && sh->target == TARGET_PS;
164
165 bool ij_interpolators[6];
166 memset(ij_interpolators, 0, sizeof(ij_interpolators));
167
168 for (unsigned i = 0; i < pshader->ninput; ++i) {
169 r600_shader_io & in = pshader->input[i];
170 bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
171 sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
172 if (ps_interp && in.spi_sid) {
173 int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location);
174 if (k >= 0)
175 ij_interpolators[k] |= true;
176 }
177 }
178
179 if (ps_interp) {
180 /* add the egcm ij interpolators to live inputs */
181 unsigned num_ij = 0;
182 for (unsigned i = 0; i < ARRAY_SIZE(ij_interpolators); i++) {
183 num_ij += ij_interpolators[i];
184 }
185
186 unsigned mask = (1 << (2 * num_ij)) - 1;
187 unsigned gpr = 0;
188
189 while (mask) {
190 sh->add_input(gpr, true, mask & 0x0F);
191 ++gpr;
192 mask >>= 4;
193 }
194 }
195
196 return 0;
197 }
198
decode_cf(unsigned & i,bool & eop)199 int bc_parser::decode_cf(unsigned &i, bool &eop) {
200
201 int r;
202
203 cf_node *cf = sh->create_cf();
204 sh->root->push_back(cf);
205
206 unsigned id = i >> 1;
207
208 cf->bc.id = id;
209
210 if (cf_map.size() < id + 1)
211 cf_map.resize(id + 1);
212
213 cf_map[id] = cf;
214
215 if ((r = dec->decode_cf(i, cf->bc)))
216 return r;
217
218 cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
219
220 if (flags & CF_ALU) {
221 if ((r = decode_alu_clause(cf)))
222 return r;
223 } else if (flags & CF_FETCH) {
224 if ((r = decode_fetch_clause(cf)))
225 return r;
226 } else if (flags & CF_EXP) {
227 if (cf->bc.rw_rel)
228 gpr_reladdr = true;
229 assert(!cf->bc.rw_rel);
230 } else if (flags & CF_MEM) {
231 if (cf->bc.rw_rel)
232 gpr_reladdr = true;
233 assert(!cf->bc.rw_rel);
234 } else if (flags & CF_BRANCH) {
235 if (cf->bc.addr > max_cf)
236 max_cf = cf->bc.addr;
237 }
238
239 eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END ||
240 cf->bc.op == CF_OP_RET;
241 return 0;
242 }
243
decode_alu_clause(cf_node * cf)244 int bc_parser::decode_alu_clause(cf_node* cf) {
245 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
246
247 cf->subtype = NST_ALU_CLAUSE;
248
249 cgroup = 0;
250 memset(slots[0], 0, 5*sizeof(slots[0][0]));
251
252 unsigned ng = 0;
253
254 do {
255 decode_alu_group(cf, i, gcnt);
256 assert(gcnt <= cnt);
257 cnt -= gcnt;
258 ng++;
259 } while (cnt);
260
261 return 0;
262 }
263
decode_alu_group(cf_node * cf,unsigned & i,unsigned & gcnt)264 int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
265 int r;
266 alu_node *n;
267 alu_group_node *g = sh->create_alu_group();
268
269 cgroup = !cgroup;
270 memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
271 gcnt = 0;
272
273 unsigned literal_mask = 0;
274
275 do {
276 n = sh->create_alu();
277 g->push_back(n);
278
279 if ((r = dec->decode_alu(i, n->bc)))
280 return r;
281
282 if (!sh->assign_slot(n, slots[cgroup])) {
283 assert(!"alu slot assignment failed");
284 return -1;
285 }
286
287 gcnt++;
288
289 } while (gcnt <= 5 && !n->bc.last);
290
291 assert(n->bc.last);
292
293 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
294 n = static_cast<alu_node*>(*I);
295
296 if (n->bc.dst_rel)
297 gpr_reladdr = true;
298
299 for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
300 bc_alu_src &src = n->bc.src[k];
301 if (src.rel)
302 gpr_reladdr = true;
303 if (src.sel == ALU_SRC_LITERAL) {
304 literal_mask |= (1 << src.chan);
305 src.value.u = dw[i + src.chan];
306 }
307 }
308 }
309
310 unsigned literal_ndw = 0;
311 while (literal_mask) {
312 g->literals.push_back(dw[i + literal_ndw]);
313 literal_ndw += 1;
314 literal_mask >>= 1;
315 }
316
317 literal_ndw = (literal_ndw + 1) & ~1u;
318
319 i += literal_ndw;
320 gcnt += literal_ndw >> 1;
321
322 cf->push_back(g);
323 return 0;
324 }
325
prepare_alu_clause(cf_node * cf)326 int bc_parser::prepare_alu_clause(cf_node* cf) {
327
328 // loop over alu groups
329 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
330 assert(I->subtype == NST_ALU_GROUP);
331 alu_group_node *g = static_cast<alu_group_node*>(*I);
332 prepare_alu_group(cf, g);
333 }
334
335 return 0;
336 }
337
save_set_cf_index(value * val,unsigned idx)338 void bc_parser::save_set_cf_index(value *val, unsigned idx)
339 {
340 assert(idx <= 1);
341 assert(val);
342 cf_index_value[idx] = val;
343 }
get_cf_index_value(unsigned idx)344 value *bc_parser::get_cf_index_value(unsigned idx)
345 {
346 assert(idx <= 1);
347 assert(cf_index_value[idx]);
348 return cf_index_value[idx];
349 }
save_mova(alu_node * mova)350 void bc_parser::save_mova(alu_node *mova)
351 {
352 assert(mova);
353 this->mova = mova;
354 }
get_mova()355 alu_node *bc_parser::get_mova()
356 {
357 assert(mova);
358 return mova;
359 }
360
prepare_alu_group(cf_node * cf,alu_group_node * g)361 int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
362
363 alu_node *n;
364
365 cgroup = !cgroup;
366 memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
367
368 for (node_iterator I = g->begin(), E = g->end();
369 I != E; ++I) {
370 n = static_cast<alu_node*>(*I);
371 bool ubo_indexing[2] = {};
372
373 if (!sh->assign_slot(n, slots[cgroup])) {
374 assert(!"alu slot assignment failed");
375 return -1;
376 }
377
378 unsigned src_count = n->bc.op_ptr->src_count;
379
380 if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
381 n->flags |= NF_ALU_4SLOT;
382
383 n->src.resize(src_count);
384
385 unsigned flags = n->bc.op_ptr->flags;
386
387 if (flags & AF_LDS) {
388 bool need_rw = false, need_oqa = false, need_oqb = false;
389 int ndst = 0, ncount = 0;
390
391 /* all non-read operations have side effects */
392 if (n->bc.op != LDS_OP2_LDS_READ2_RET &&
393 n->bc.op != LDS_OP1_LDS_READ_REL_RET &&
394 n->bc.op != LDS_OP1_LDS_READ_RET) {
395 n->flags |= NF_DONT_KILL;
396 ndst++;
397 need_rw = true;
398 }
399
400 if (n->bc.op >= LDS_OP2_LDS_ADD_RET && n->bc.op <= LDS_OP1_LDS_USHORT_READ_RET) {
401 need_oqa = true;
402 ndst++;
403 }
404
405 if (n->bc.op == LDS_OP2_LDS_READ2_RET || n->bc.op == LDS_OP1_LDS_READ_REL_RET) {
406 need_oqb = true;
407 ndst++;
408 }
409
410 n->dst.resize(ndst);
411 if (need_oqa)
412 n->dst[ncount++] = sh->get_special_value(SV_LDS_OQA);
413 if (need_oqb)
414 n->dst[ncount++] = sh->get_special_value(SV_LDS_OQB);
415 if (need_rw)
416 n->dst[ncount++] = sh->get_special_value(SV_LDS_RW);
417
418 n->flags |= NF_DONT_MOVE | NF_DONT_HOIST;
419
420 } else if (flags & AF_PRED) {
421 n->dst.resize(3);
422 if (n->bc.update_pred)
423 n->dst[1] = sh->get_special_value(SV_ALU_PRED);
424 if (n->bc.update_exec_mask)
425 n->dst[2] = sh->get_special_value(SV_EXEC_MASK);
426
427 n->flags |= NF_DONT_HOIST;
428
429 } else if (flags & AF_KILL) {
430
431 n->dst.resize(2);
432 n->dst[1] = sh->get_special_value(SV_VALID_MASK);
433 sh->set_uses_kill();
434
435 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE |
436 NF_DONT_KILL | NF_SCHEDULE_EARLY;
437
438 } else {
439 n->dst.resize(1);
440 }
441
442 if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) {
443 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
444 // DCE will kill this op
445 save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1);
446 } else if (flags & AF_MOVA) {
447
448 n->dst[0] = sh->get_special_value(SV_AR_INDEX);
449 save_mova(n);
450
451 n->flags |= NF_DONT_HOIST;
452
453 } else if ((n->bc.op_ptr->src_count == 3 || n->bc.write_mask) && !(flags & AF_LDS)) {
454 assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X);
455
456 value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan,
457 n->bc.dst_rel);
458
459 n->dst[0] = v;
460 }
461
462 if (n->bc.pred_sel) {
463 sh->has_alu_predication = true;
464 n->pred = sh->get_special_value(SV_ALU_PRED);
465 }
466
467 for (unsigned s = 0; s < src_count; ++s) {
468 bc_alu_src &src = n->bc.src[s];
469
470 if (src.sel == ALU_SRC_LITERAL) {
471 n->src[s] = sh->get_const_value(src.value);
472 } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
473 unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
474 SLOT_TRANS : src.chan;
475
476 // XXX shouldn't happen but llvm backend uses PS on cayman
477 if (prev_slot == SLOT_TRANS && ctx.is_cayman())
478 prev_slot = SLOT_X;
479
480 alu_node *prev_alu = slots[pgroup][prev_slot];
481
482 assert(prev_alu);
483
484 if (!prev_alu->dst[0]) {
485 value * t = sh->create_temp_value();
486 prev_alu->dst[0] = t;
487 }
488
489 value *d = prev_alu->dst[0];
490
491 if (d->is_rel()) {
492 d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr,
493 prev_alu->bc.dst_chan,
494 prev_alu->bc.dst_rel);
495 }
496
497 n->src[s] = d;
498 } else if (ctx.is_kcache_sel(src.sel)) {
499 unsigned sel = src.sel, kc_addr;
500 unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1);
501
502 bc_kcache &kc = cf->bc.kc[kc_set];
503 kc_addr = (kc.addr << 4) + (sel & 0x1F);
504 n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode);
505
506 if (kc.index_mode != KC_INDEX_NONE) {
507 assert(kc.index_mode != KC_LOCK_LOOP);
508 ubo_indexing[kc.index_mode - KC_INDEX_0] = true;
509 }
510 } else if (src.sel < MAX_GPR) {
511 value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel);
512
513 n->src[s] = v;
514
515 } else if (src.sel >= ALU_SRC_PARAM_OFFSET) {
516 // using slot for value channel because in fact the slot
517 // determines the channel that is loaded by INTERP_LOAD_P0
518 // (and maybe some others).
519 // otherwise GVN will consider INTERP_LOAD_P0s with the same
520 // param index as equal instructions and leave only one of them
521 n->src[s] = sh->get_special_ro_value(sel_chan(src.sel,
522 n->bc.slot));
523 } else if (ctx.is_lds_oq(src.sel)) {
524 switch (src.sel) {
525 case ALU_SRC_LDS_OQ_A:
526 case ALU_SRC_LDS_OQ_B:
527 assert(!"Unsupported LDS queue access in SB");
528 break;
529 case ALU_SRC_LDS_OQ_A_POP:
530 n->src[s] = sh->get_special_value(SV_LDS_OQA);
531 break;
532 case ALU_SRC_LDS_OQ_B_POP:
533 n->src[s] = sh->get_special_value(SV_LDS_OQB);
534 break;
535 }
536 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
537
538 } else {
539 switch (src.sel) {
540 case ALU_SRC_0:
541 n->src[s] = sh->get_const_value(0);
542 break;
543 case ALU_SRC_0_5:
544 n->src[s] = sh->get_const_value(0.5f);
545 break;
546 case ALU_SRC_1:
547 n->src[s] = sh->get_const_value(1.0f);
548 break;
549 case ALU_SRC_1_INT:
550 n->src[s] = sh->get_const_value(1);
551 break;
552 case ALU_SRC_M_1_INT:
553 n->src[s] = sh->get_const_value(-1);
554 break;
555 default:
556 n->src[s] = sh->get_special_ro_value(src.sel);
557 break;
558 }
559 }
560 }
561
562 // add UBO index values if any as dependencies
563 if (ubo_indexing[0]) {
564 n->src.push_back(get_cf_index_value(0));
565 }
566 if (ubo_indexing[1]) {
567 n->src.push_back(get_cf_index_value(1));
568 }
569
570 if ((flags & AF_MOVA) && (n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) &&
571 ctx.is_cayman())
572 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
573 save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1);
574 }
575
576 // pack multislot instructions into alu_packed_node
577
578 alu_packed_node *p = NULL;
579 for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) {
580 N = I + 1;
581 alu_node *a = static_cast<alu_node*>(*I);
582 unsigned sflags = a->bc.slot_flags;
583
584 if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) {
585 if (!p)
586 p = sh->create_alu_packed();
587
588 a->remove();
589 p->push_back(a);
590 }
591 }
592
593 if (p) {
594 g->push_front(p);
595
596 if (p->count() == 3 && ctx.is_cayman()) {
597 // cayman's scalar instruction that can use 3 or 4 slots
598
599 // FIXME for simplicity we'll always add 4th slot,
600 // but probably we might want to always remove 4th slot and make
601 // sure that regalloc won't choose 'w' component for dst
602
603 alu_node *f = static_cast<alu_node*>(p->first);
604 alu_node *a = sh->create_alu();
605 a->src = f->src;
606 a->dst.resize(f->dst.size());
607 a->bc = f->bc;
608 a->bc.slot = SLOT_W;
609 p->push_back(a);
610 }
611 }
612
613 return 0;
614 }
615
decode_fetch_clause(cf_node * cf)616 int bc_parser::decode_fetch_clause(cf_node* cf) {
617 int r;
618 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
619
620 if (cf->bc.op_ptr->flags && FF_GDS)
621 cf->subtype = NST_GDS_CLAUSE;
622 else
623 cf->subtype = NST_TEX_CLAUSE;
624
625 while (cnt--) {
626 fetch_node *n = sh->create_fetch();
627 cf->push_back(n);
628 if ((r = dec->decode_fetch(i, n->bc)))
629 return r;
630 if (n->bc.src_rel || n->bc.dst_rel)
631 gpr_reladdr = true;
632
633 }
634 return 0;
635 }
636
prepare_fetch_clause(cf_node * cf)637 int bc_parser::prepare_fetch_clause(cf_node *cf) {
638
639 vvec grad_v, grad_h, texture_offsets;
640
641 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
642
643 fetch_node *n = static_cast<fetch_node*>(*I);
644 assert(n->is_valid());
645
646 unsigned flags = n->bc.op_ptr->flags;
647
648 unsigned vtx = flags & FF_VTX;
649 unsigned gds = flags & FF_GDS;
650 unsigned num_src = gds ? 2 : vtx ? ctx.vtx_src_num : 4;
651
652 n->dst.resize(4);
653
654 if (gds) {
655 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL;
656 }
657 if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) {
658 sh->uses_gradients = true;
659 }
660
661 if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) {
662
663 vvec *grad = NULL;
664
665 switch (n->bc.op) {
666 case FETCH_OP_SET_GRADIENTS_V:
667 grad = &grad_v;
668 break;
669 case FETCH_OP_SET_GRADIENTS_H:
670 grad = &grad_h;
671 break;
672 case FETCH_OP_SET_TEXTURE_OFFSETS:
673 grad = &texture_offsets;
674 break;
675 default:
676 assert(!"unexpected SET_GRAD instruction");
677 return -1;
678 }
679
680 if (grad->empty())
681 grad->resize(4);
682
683 for(unsigned s = 0; s < 4; ++s) {
684 unsigned sw = n->bc.src_sel[s];
685 if (sw <= SEL_W)
686 (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr,
687 sw, false);
688 else if (sw == SEL_0)
689 (*grad)[s] = sh->get_const_value(0.0f);
690 else if (sw == SEL_1)
691 (*grad)[s] = sh->get_const_value(1.0f);
692 }
693 } else {
694 // Fold source values for instructions with hidden target values in to the instructions
695 // using them. The set instructions are later re-emitted by bc_finalizer
696 if (flags & FF_USEGRAD) {
697 n->src.resize(12);
698 std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4);
699 std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8);
700 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
701 n->src.resize(8);
702 std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4);
703 } else {
704 n->src.resize(4);
705 }
706
707 for(int s = 0; s < 4; ++s) {
708 if (n->bc.dst_sel[s] != SEL_MASK)
709 n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false);
710 // NOTE: it doesn't matter here which components of the result we
711 // are using, but original n->bc.dst_sel should be taken into
712 // account when building the bytecode
713 }
714 for(unsigned s = 0; s < num_src; ++s) {
715 if (n->bc.src_sel[s] <= SEL_W)
716 n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr,
717 n->bc.src_sel[s], false);
718 }
719
720 // Scheduler will emit the appropriate instructions to set CF_IDX0/1
721 if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
722 n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1));
723 }
724 if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
725 n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1));
726 }
727 }
728 }
729
730 return 0;
731 }
732
prepare_ir()733 int bc_parser::prepare_ir() {
734
735 for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) {
736 cf_node *c = *I;
737
738 if (!c)
739 continue;
740
741 unsigned flags = c->bc.op_ptr->flags;
742
743 if (flags & CF_ALU) {
744 prepare_alu_clause(c);
745 } else if (flags & CF_FETCH) {
746 prepare_fetch_clause(c);
747 } else if (c->bc.op == CF_OP_CALL_FS) {
748 sh->init_call_fs(c);
749 c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
750 } else if (flags & CF_LOOP_START) {
751 prepare_loop(c);
752 } else if (c->bc.op == CF_OP_JUMP) {
753 prepare_if(c);
754 } else if (c->bc.op == CF_OP_LOOP_END) {
755 loop_stack.pop();
756 } else if (c->bc.op == CF_OP_LOOP_CONTINUE) {
757 assert(!loop_stack.empty());
758 repeat_node *rep = sh->create_repeat(loop_stack.top());
759 if (c->parent->first != c)
760 rep->move(c->parent->first, c);
761 c->replace_with(rep);
762 sh->simplify_dep_rep(rep);
763 } else if (c->bc.op == CF_OP_LOOP_BREAK) {
764 assert(!loop_stack.empty());
765 depart_node *dep = sh->create_depart(loop_stack.top());
766 if (c->parent->first != c)
767 dep->move(c->parent->first, c);
768 c->replace_with(dep);
769 sh->simplify_dep_rep(dep);
770 } else if (flags & CF_EXP) {
771
772 // unroll burst exports
773
774 assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE);
775
776 c->bc.set_op(CF_OP_EXPORT);
777
778 unsigned burst_count = c->bc.burst_count;
779 unsigned eop = c->bc.end_of_program;
780
781 c->bc.end_of_program = 0;
782 c->bc.burst_count = 0;
783
784 do {
785 c->src.resize(4);
786
787 for(int s = 0; s < 4; ++s) {
788 switch (c->bc.sel[s]) {
789 case SEL_0:
790 c->src[s] = sh->get_const_value(0.0f);
791 break;
792 case SEL_1:
793 c->src[s] = sh->get_const_value(1.0f);
794 break;
795 case SEL_MASK:
796 break;
797 default:
798 if (c->bc.sel[s] <= SEL_W)
799 c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr,
800 c->bc.sel[s], false);
801 else
802 assert(!"invalid src_sel for export");
803 }
804 }
805
806 if (!burst_count--)
807 break;
808
809 cf_node *cf_next = sh->create_cf();
810 cf_next->bc = c->bc;
811 ++cf_next->bc.rw_gpr;
812 ++cf_next->bc.array_base;
813
814 c->insert_after(cf_next);
815 c = cf_next;
816
817 } while (1);
818
819 c->bc.end_of_program = eop;
820 } else if (flags & CF_MEM) {
821
822 unsigned burst_count = c->bc.burst_count;
823 unsigned eop = c->bc.end_of_program;
824
825 c->bc.end_of_program = 0;
826 c->bc.burst_count = 0;
827
828 do {
829
830 c->src.resize(4);
831
832 for(int s = 0; s < 4; ++s) {
833 if (c->bc.comp_mask & (1 << s))
834 c->src[s] =
835 sh->get_gpr_value(true, c->bc.rw_gpr, s, false);
836 }
837
838 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write
839 c->src.resize(8);
840 for(int s = 0; s < 3; ++s) {
841 c->src[4 + s] =
842 sh->get_gpr_value(true, c->bc.index_gpr, s, false);
843 }
844
845 // FIXME probably we can relax it a bit
846 c->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
847 }
848
849 if (flags & CF_EMIT) {
850 // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX
851 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
852 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
853 if (sh->target == TARGET_ES) {
854 // For ES shaders this is an export
855 c->flags |= NF_DONT_KILL;
856 }
857 }
858
859 if (!burst_count--)
860 break;
861
862 cf_node *cf_next = sh->create_cf();
863 cf_next->bc = c->bc;
864 ++cf_next->bc.rw_gpr;
865
866 // FIXME is it correct?
867 cf_next->bc.array_base += cf_next->bc.elem_size + 1;
868
869 c->insert_after(cf_next);
870 c = cf_next;
871 } while (1);
872
873 c->bc.end_of_program = eop;
874
875 } else if (flags & CF_EMIT) {
876 /* quick peephole */
877 cf_node *prev = static_cast<cf_node *>(c->prev);
878 if (c->bc.op == CF_OP_CUT_VERTEX &&
879 prev && prev->is_valid() &&
880 prev->bc.op == CF_OP_EMIT_VERTEX &&
881 c->bc.count == prev->bc.count) {
882 prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX);
883 prev->bc.end_of_program = c->bc.end_of_program;
884 c->remove();
885 }
886 else {
887 c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
888
889 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
890 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
891 }
892 }
893 }
894
895 assert(loop_stack.empty());
896 return 0;
897 }
898
prepare_loop(cf_node * c)899 int bc_parser::prepare_loop(cf_node* c) {
900 assert(c->bc.addr-1 < cf_map.size());
901
902 cf_node *end = cf_map[c->bc.addr - 1];
903 assert(end->bc.op == CF_OP_LOOP_END);
904 assert(c->parent == end->parent);
905
906 region_node *reg = sh->create_region();
907 repeat_node *rep = sh->create_repeat(reg);
908
909 reg->push_back(rep);
910 c->insert_before(reg);
911 rep->move(c, end->next);
912
913 reg->src_loop = true;
914
915 loop_stack.push(reg);
916 return 0;
917 }
918
prepare_if(cf_node * c)919 int bc_parser::prepare_if(cf_node* c) {
920 assert(c->bc.addr-1 < cf_map.size());
921 cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
922
923 if (!end)
924 return 0; // not quite sure how this happens, malformed input?
925
926 BCP_DUMP(
927 sblog << "parsing JUMP @" << c->bc.id;
928 sblog << "\n";
929 );
930
931 if (end->bc.op == CF_OP_ELSE) {
932 BCP_DUMP(
933 sblog << " found ELSE : ";
934 dump::dump_op(end);
935 sblog << "\n";
936 );
937
938 c_else = end;
939 end = cf_map[c_else->bc.addr];
940 } else {
941 BCP_DUMP(
942 sblog << " no else\n";
943 );
944
945 c_else = end;
946 }
947
948 if (c_else->parent != c->parent)
949 c_else = NULL;
950
951 if (end && end->parent != c->parent)
952 end = NULL;
953
954 region_node *reg = sh->create_region();
955
956 depart_node *dep2 = sh->create_depart(reg);
957 depart_node *dep = sh->create_depart(reg);
958 if_node *n_if = sh->create_if();
959
960 c->insert_before(reg);
961
962 if (c_else != end)
963 dep->move(c_else, end);
964 dep2->move(c, end);
965
966 reg->push_back(dep);
967 dep->push_front(n_if);
968 n_if->push_back(dep2);
969
970 n_if->cond = sh->get_special_value(SV_EXEC_MASK);
971
972 return 0;
973 }
974
975
976 } // namespace r600_sb
977