1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #include "sb_bc.h"
28 #include "sb_shader.h"
29 #include "sb_pass.h"
30 #include "eg_sq.h" // V_SQ_CF_INDEX_0/1
31
32 namespace r600_sb {
33
34 static const char* chans = "xyzw01?_";
35
36 static const char* vec_bs[] = {
37 "VEC_012", "VEC_021", "VEC_120", "VEC_102", "VEC_201", "VEC_210"
38 };
39
40 static const char* scl_bs[] = {
41 "SCL_210", "SCL_122", "SCL_212", "SCL_221"
42 };
43
44
visit(cf_node & n,bool enter)45 bool bc_dump::visit(cf_node& n, bool enter) {
46 if (enter) {
47
48 id = n.bc.id << 1;
49
50 if ((n.bc.op_ptr->flags & CF_ALU) && n.bc.is_alu_extended()) {
51 dump_dw(id, 2);
52 id += 2;
53 sblog << "\n";
54 }
55
56 dump_dw(id, 2);
57 dump(n);
58
59 if (n.bc.op_ptr->flags & CF_CLAUSE) {
60 id = n.bc.addr << 1;
61 new_group = 1;
62 }
63 }
64 return true;
65 }
66
visit(alu_node & n,bool enter)67 bool bc_dump::visit(alu_node& n, bool enter) {
68 if (enter) {
69 sblog << " ";
70 dump_dw(id, 2);
71
72 if (new_group) {
73 sblog.print_w(++group_index, 5);
74 sblog << " ";
75 } else
76 sblog << " ";
77
78 dump(n);
79 id += 2;
80
81 new_group = n.bc.last;
82 } else {
83 if (n.bc.last) {
84 alu_group_node *g =
85 static_cast<alu_group_node*>(n.get_alu_group_node());
86 assert(g);
87 for (unsigned k = 0; k < g->literals.size(); ++k) {
88 sblog << " ";
89 dump_dw(id, 1);
90 id += 1;
91 sblog << "\n";
92 }
93
94 id = (id + 1) & ~1u;
95 }
96 }
97
98 return false;
99 }
100
visit(fetch_node & n,bool enter)101 bool bc_dump::visit(fetch_node& n, bool enter) {
102 if (enter) {
103 sblog << " ";
104 dump_dw(id, 3);
105 dump(n);
106 id += 4;
107 }
108 return false;
109 }
110
fill_to(sb_ostringstream & s,int pos)111 static void fill_to(sb_ostringstream &s, int pos) {
112 int l = s.str().length();
113 if (l < pos)
114 s << std::string(pos-l, ' ');
115 }
116
dump(cf_node & n)117 void bc_dump::dump(cf_node& n) {
118 sb_ostringstream s;
119 s << n.bc.op_ptr->name;
120
121 if (n.bc.op_ptr->flags & CF_EXP) {
122 static const char *exp_type[] = {"PIXEL", "POS ", "PARAM"};
123
124 fill_to(s, 18);
125 s << " " << exp_type[n.bc.type] << " ";
126
127 if (n.bc.burst_count) {
128 sb_ostringstream s2;
129 s2 << n.bc.array_base << "-" << n.bc.array_base + n.bc.burst_count;
130 s.print_wl(s2.str(), 5);
131 s << " R" << n.bc.rw_gpr << "-" <<
132 n.bc.rw_gpr + n.bc.burst_count << ".";
133 } else {
134 s.print_wl(n.bc.array_base, 5);
135 s << " R" << n.bc.rw_gpr << ".";
136 }
137
138 for (int k = 0; k < 4; ++k)
139 s << chans[n.bc.sel[k]];
140
141 } else if (n.bc.op_ptr->flags & CF_MEM) {
142 static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
143 "WRITE_IND_ACK"};
144 fill_to(s, 18);
145 s << " " << exp_type[n.bc.type] << " ";
146 s.print_wl(n.bc.array_base, 5);
147 s << " R" << n.bc.rw_gpr << ".";
148 for (int k = 0; k < 4; ++k)
149 s << ((n.bc.comp_mask & (1 << k)) ? chans[k] : '_');
150
151 if ((n.bc.op_ptr->flags & CF_RAT) && (n.bc.type & 1)) {
152 s << ", @R" << n.bc.index_gpr << ".xyz";
153 }
154 if ((n.bc.op_ptr->flags & CF_MEM) && (n.bc.type & 1)) {
155 s << ", @R" << n.bc.index_gpr << ".x";
156 }
157
158 s << " ES:" << n.bc.elem_size;
159
160 s << " OP:" << n.bc.rat_inst;
161
162 if (n.bc.mark)
163 s << " MARK";
164
165 } else {
166
167 if (n.bc.op_ptr->flags & CF_CLAUSE) {
168 s << " " << n.bc.count+1;
169 }
170
171 s << " @" << (n.bc.addr << 1);
172
173 if (n.bc.op_ptr->flags & CF_ALU) {
174 static const char *index_mode[] = {"", " CF_INDEX_0", " CF_INDEX_1"};
175
176 for (int k = 0; k < 4; ++k) {
177 bc_kcache &kc = n.bc.kc[k];
178 if (kc.mode) {
179 s << " KC" << k << "[CB" << kc.bank << ":" <<
180 (kc.addr << 4) << "-" <<
181 (((kc.addr + kc.mode) << 4) - 1) << index_mode[kc.index_mode] << "]";
182 }
183 }
184 }
185
186 if (n.bc.cond)
187 s << " CND:" << n.bc.cond;
188
189 if (n.bc.pop_count)
190 s << " POP:" << n.bc.pop_count;
191
192 if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT))
193 s << " STREAM" << n.bc.count;
194 }
195
196 if (!n.bc.barrier)
197 s << " NO_BARRIER";
198
199 if (n.bc.valid_pixel_mode)
200 s << " VPM";
201
202 if (n.bc.whole_quad_mode)
203 s << " WQM";
204
205 if (n.bc.end_of_program)
206 s << " EOP";
207
208 sblog << s.str() << "\n";
209 }
210
211
print_sel(sb_ostream & s,int sel,int rel,int index_mode,int need_brackets)212 static void print_sel(sb_ostream &s, int sel, int rel, int index_mode,
213 int need_brackets) {
214 if (rel && index_mode >= 5 && sel < 128)
215 s << "G";
216 if (rel || need_brackets) {
217 s << "[";
218 }
219 s << sel;
220 if (rel) {
221 if (index_mode == 0 || index_mode == 6)
222 s << "+AR";
223 else if (index_mode == 4)
224 s << "+AL";
225 }
226 if (rel || need_brackets) {
227 s << "]";
228 }
229 }
230
print_dst(sb_ostream & s,bc_alu & alu)231 static void print_dst(sb_ostream &s, bc_alu &alu)
232 {
233 unsigned sel = alu.dst_gpr;
234 char reg_char = 'R';
235 if (sel >= 128 - 4) { // clause temporary gpr
236 sel -= 128 - 4;
237 reg_char = 'T';
238 }
239
240 if (alu.write_mask || (alu.op_ptr->src_count == 3 && alu.op < LDS_OP2_LDS_ADD)) {
241 s << reg_char;
242 print_sel(s, sel, alu.dst_rel, alu.index_mode, 0);
243 } else {
244 s << "__";
245 }
246 s << ".";
247 s << chans[alu.dst_chan];
248 }
249
print_src(sb_ostream & s,bc_alu & alu,unsigned idx)250 static void print_src(sb_ostream &s, bc_alu &alu, unsigned idx)
251 {
252 bc_alu_src *src = &alu.src[idx];
253 unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0;
254
255 if (src->neg)
256 s <<"-";
257 if (src->abs)
258 s <<"|";
259
260 if (sel < 128 - 4) {
261 s << "R";
262 } else if (sel < 128) {
263 s << "T";
264 sel -= 128 - 4;
265 } else if (sel < 160) {
266 s << "KC0";
267 need_brackets = 1;
268 sel -= 128;
269 } else if (sel < 192) {
270 s << "KC1";
271 need_brackets = 1;
272 sel -= 160;
273 } else if (sel >= 448) {
274 s << "Param";
275 sel -= 448;
276 } else if (sel >= 288) {
277 s << "KC3";
278 need_brackets = 1;
279 sel -= 288;
280 } else if (sel >= 256) {
281 s << "KC2";
282 need_brackets = 1;
283 sel -= 256;
284 } else {
285 need_sel = 0;
286 need_chan = 0;
287 switch (sel) {
288 case ALU_SRC_LDS_OQ_A:
289 s << "LDS_OQ_A";
290 need_chan = 1;
291 break;
292 case ALU_SRC_LDS_OQ_B:
293 s << "LDS_OQ_B";
294 need_chan = 1;
295 break;
296 case ALU_SRC_LDS_OQ_A_POP:
297 s << "LDS_OQ_A_POP";
298 need_chan = 1;
299 break;
300 case ALU_SRC_LDS_OQ_B_POP:
301 s << "LDS_OQ_B_POP";
302 need_chan = 1;
303 break;
304 case ALU_SRC_LDS_DIRECT_A:
305 s << "LDS_A["; s.print_zw_hex(src->value.u, 8); s << "]";
306 break;
307 case ALU_SRC_LDS_DIRECT_B:
308 s << "LDS_B["; s.print_zw_hex(src->value.u, 8); s << "]";
309 break;
310 case ALU_SRC_PS:
311 s << "PS";
312 break;
313 case ALU_SRC_PV:
314 s << "PV";
315 need_chan = 1;
316 break;
317 case ALU_SRC_LITERAL:
318 s << "[0x";
319 s.print_zw_hex(src->value.u, 8);
320 s << " " << src->value.f << "]";
321 need_chan = 1;
322 break;
323 case ALU_SRC_0_5:
324 s << "0.5";
325 break;
326 case ALU_SRC_M_1_INT:
327 s << "-1";
328 break;
329 case ALU_SRC_1_INT:
330 s << "1";
331 break;
332 case ALU_SRC_1:
333 s << "1.0";
334 break;
335 case ALU_SRC_0:
336 s << "0";
337 break;
338 case ALU_SRC_TIME_LO:
339 s << "TIME_LO";
340 break;
341 case ALU_SRC_TIME_HI:
342 s << "TIME_HI";
343 break;
344 case ALU_SRC_MASK_LO:
345 s << "MASK_LO";
346 break;
347 case ALU_SRC_MASK_HI:
348 s << "MASK_HI";
349 break;
350 case ALU_SRC_HW_WAVE_ID:
351 s << "HW_WAVE_ID";
352 break;
353 case ALU_SRC_SIMD_ID:
354 s << "SIMD_ID";
355 break;
356 case ALU_SRC_SE_ID:
357 s << "SE_ID";
358 break;
359 default:
360 s << "??IMM_" << sel;
361 break;
362 }
363 }
364
365 if (need_sel)
366 print_sel(s, sel, src->rel, alu.index_mode, need_brackets);
367
368 if (need_chan) {
369 s << "." << chans[src->chan];
370 }
371
372 if (src->abs)
373 s << "|";
374 }
dump(alu_node & n)375 void bc_dump::dump(alu_node& n) {
376 sb_ostringstream s;
377 static const char *omod_str[] = {"","*2","*4","/2"};
378 static const char *slots = "xyzwt";
379
380 s << (n.bc.update_exec_mask ? "M" : " ");
381 s << (n.bc.update_pred ? "P" : " ");
382 s << " ";
383 s << (n.bc.pred_sel>=2 ? (n.bc.pred_sel == 2 ? "0" : "1") : " ");
384 s << " ";
385
386 s << slots[n.bc.slot] << ": ";
387
388 s << n.bc.op_ptr->name << omod_str[n.bc.omod] << (n.bc.clamp ? "_sat" : "");
389 fill_to(s, 26);
390 s << " ";
391
392 print_dst(s, n.bc);
393 for (int k = 0; k < n.bc.op_ptr->src_count; ++k) {
394 s << (k ? ", " : ", ");
395 print_src(s, n.bc, k);
396 }
397
398 if (n.bc.bank_swizzle) {
399 fill_to(s, 55);
400 if (n.bc.slot == SLOT_TRANS)
401 s << " " << scl_bs[n.bc.bank_swizzle];
402 else
403 s << " " << vec_bs[n.bc.bank_swizzle];
404 }
405
406 if (ctx.is_cayman()) {
407 if (n.bc.op == ALU_OP1_MOVA_INT) {
408 static const char *mova_str[] = { " AR_X", " PC", " CF_IDX0", " CF_IDX1",
409 " Unknown MOVA_INT dest" };
410 s << mova_str[std::min(n.bc.dst_gpr, 4u)]; // CM_V_SQ_MOVA_DST_AR_*
411 }
412 }
413
414 if (n.bc.lds_idx_offset) {
415 s << " IDX_OFFSET:" << n.bc.lds_idx_offset;
416 }
417
418 sblog << s.str() << "\n";
419 }
420
init()421 int bc_dump::init() {
422 sb_ostringstream s;
423 s << "===== SHADER #" << sh.id;
424
425 if (sh.optimized)
426 s << " OPT";
427
428 s << " ";
429
430 std::string target = std::string(" ") +
431 sh.get_full_target_name() + " =====";
432
433 while (s.str().length() + target.length() < 80)
434 s << "=";
435
436 s << target;
437
438 sblog << "\n" << s.str() << "\n";
439
440 s.clear();
441
442 if (bc_data) {
443 s << "===== " << ndw << " dw ===== " << sh.ngpr
444 << " gprs ===== " << sh.nstack << " stack ";
445 }
446
447 while (s.str().length() < 80)
448 s << "=";
449
450 sblog << s.str() << "\n";
451
452 return 0;
453 }
454
done()455 int bc_dump::done() {
456 sb_ostringstream s;
457 s << "===== SHADER_END ";
458
459 while (s.str().length() < 80)
460 s << "=";
461
462 sblog << s.str() << "\n\n";
463
464 return 0;
465 }
466
bc_dump(shader & s,bytecode * bc)467 bc_dump::bc_dump(shader& s, bytecode* bc) :
468 vpass(s), bc_data(), ndw(), id(),
469 new_group(), group_index() {
470
471 if (bc) {
472 bc_data = bc->data();
473 ndw = bc->ndw();
474 }
475 }
476
dump(fetch_node & n)477 void bc_dump::dump(fetch_node& n) {
478 sb_ostringstream s;
479 static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
480 unsigned gds = n.bc.op_ptr->flags & FF_GDS;
481 bool gds_has_ret = gds && n.bc.op >= FETCH_OP_GDS_ADD_RET &&
482 n.bc.op <= FETCH_OP_GDS_USHORT_READ_RET;
483 bool show_dst = !gds || (gds && gds_has_ret);
484
485 s << n.bc.op_ptr->name;
486 fill_to(s, 20);
487
488 if (show_dst) {
489 s << "R";
490 print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0);
491 s << ".";
492 for (int k = 0; k < 4; ++k)
493 s << chans[n.bc.dst_sel[k]];
494 s << ", ";
495 }
496
497 s << "R";
498 print_sel(s, n.bc.src_gpr, n.bc.src_rel, INDEX_LOOP, 0);
499 s << ".";
500
501 unsigned vtx = n.bc.op_ptr->flags & FF_VTX;
502 unsigned num_src_comp = gds ? 3 : vtx ? ctx.is_cayman() ? 2 : 1 : 4;
503
504 for (unsigned k = 0; k < num_src_comp; ++k)
505 s << chans[n.bc.src_sel[k]];
506
507 if (vtx && n.bc.offset[0]) {
508 s << " + " << n.bc.offset[0] << "b ";
509 }
510
511 if (!gds)
512 s << ", RID:" << n.bc.resource_id;
513
514 if (gds) {
515 s << " UAV:" << n.bc.uav_id;
516 if (n.bc.uav_index_mode)
517 s << " UAV:SQ_CF_INDEX_" << (n.bc.uav_index_mode - V_SQ_CF_INDEX_0);
518 if (n.bc.bcast_first_req)
519 s << " BFQ";
520 if (n.bc.alloc_consume)
521 s << " AC";
522 } else if (vtx) {
523 s << " " << fetch_type[n.bc.fetch_type];
524 if (!ctx.is_cayman() && n.bc.mega_fetch_count)
525 s << " MFC:" << n.bc.mega_fetch_count;
526 if (n.bc.fetch_whole_quad)
527 s << " FWQ";
528 if (ctx.is_egcm() && n.bc.resource_index_mode)
529 s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
530 if (ctx.is_egcm() && n.bc.sampler_index_mode)
531 s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
532
533 s << " UCF:" << n.bc.use_const_fields
534 << " FMT(DTA:" << n.bc.data_format
535 << " NUM:" << n.bc.num_format_all
536 << " COMP:" << n.bc.format_comp_all
537 << " MODE:" << n.bc.srf_mode_all << ")";
538 } else {
539 s << ", SID:" << n.bc.sampler_id;
540 if (n.bc.lod_bias)
541 s << " LB:" << n.bc.lod_bias;
542 s << " CT:";
543 for (unsigned k = 0; k < 4; ++k)
544 s << (n.bc.coord_type[k] ? "N" : "U");
545 for (unsigned k = 0; k < 3; ++k)
546 if (n.bc.offset[k])
547 s << " O" << chans[k] << ":" << n.bc.offset[k];
548 if (ctx.is_egcm() && n.bc.resource_index_mode)
549 s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
550 if (ctx.is_egcm() && n.bc.sampler_index_mode)
551 s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
552 }
553
554 if (n.bc.op_ptr->flags & FF_MEM) {
555 s << ", ELEM_SIZE:" << n.bc.elem_size;
556 if (n.bc.uncached)
557 s << ", UNCACHED";
558 if (n.bc.indexed)
559 s << ", INDEXED";
560 if (n.bc.burst_count)
561 s << ", BURST_COUNT:" << n.bc.burst_count;
562 s << ", ARRAY_BASE:" << n.bc.array_base;
563 s << ", ARRAY_SIZE:" << n.bc.array_size;
564 }
565
566 sblog << s.str() << "\n";
567 }
568
dump_dw(unsigned dw_id,unsigned count)569 void bc_dump::dump_dw(unsigned dw_id, unsigned count) {
570 if (!bc_data)
571 return;
572
573 assert(dw_id + count <= ndw);
574
575 sblog.print_zw(dw_id, 4);
576 sblog << " ";
577 while (count--) {
578 sblog.print_zw_hex(bc_data[dw_id++], 8);
579 sblog << " ";
580 }
581 }
582
583 } // namespace r600_sb
584