1 /*
2 * Copyright (C) 2019 Connor Abbott <cwabbott0@gmail.com>
3 * Copyright (C) 2019 Lyude Paul <thatslyude@gmail.com>
4 * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 */
25
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <assert.h>
30 #include <inttypes.h>
31 #include <string.h>
32
33 #include "bifrost.h"
34 #include "disassemble.h"
35 #include "bi_print_common.h"
36 #include "util/macros.h"
37
38 // return bits (high, lo]
bits(uint32_t word,unsigned lo,unsigned high)39 static uint64_t bits(uint32_t word, unsigned lo, unsigned high)
40 {
41 if (high == 32)
42 return word >> lo;
43 return (word & ((1 << high) - 1)) >> lo;
44 }
45
46 // each of these structs represents an instruction that's dispatched in one
47 // cycle. Note that these instructions are packed in funny ways within the
48 // clause, hence the need for a separate struct.
49 struct bifrost_alu_inst {
50 uint32_t fma_bits;
51 uint32_t add_bits;
52 uint64_t reg_bits;
53 };
54
get_reg0(struct bifrost_regs regs)55 static unsigned get_reg0(struct bifrost_regs regs)
56 {
57 if (regs.ctrl == 0)
58 return regs.reg0 | ((regs.reg1 & 0x1) << 5);
59
60 return regs.reg0 <= regs.reg1 ? regs.reg0 : 63 - regs.reg0;
61 }
62
get_reg1(struct bifrost_regs regs)63 static unsigned get_reg1(struct bifrost_regs regs)
64 {
65 return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1;
66 }
67
68 // this represents the decoded version of the ctrl register field.
69 struct bifrost_reg_ctrl {
70 bool read_reg0;
71 bool read_reg1;
72 struct bifrost_reg_ctrl_23 slot23;
73 bool clause_start;
74 };
75
dump_header(FILE * fp,struct bifrost_header header,bool verbose)76 static void dump_header(FILE *fp, struct bifrost_header header, bool verbose)
77 {
78 fprintf(fp, "ds(%du) ", header.dependency_slot);
79
80 if (header.staging_barrier)
81 fprintf(fp, "osrb ");
82
83 fprintf(fp, "%s ", bi_flow_control_name(header.flow_control));
84
85 if (header.suppress_inf)
86 fprintf(fp, "inf_suppress ");
87 if (header.suppress_nan)
88 fprintf(fp, "nan_suppress ");
89
90 if (header.flush_to_zero == BIFROST_FTZ_DX11)
91 fprintf(fp, "ftz_dx11 ");
92 else if (header.flush_to_zero == BIFROST_FTZ_ALWAYS)
93 fprintf(fp, "ftz_hsa ");
94 if (header.flush_to_zero == BIFROST_FTZ_ABRUPT)
95 fprintf(fp, "ftz_au ");
96
97 assert(!header.zero1);
98 assert(!header.zero2);
99
100 if (header.float_exceptions == BIFROST_EXCEPTIONS_DISABLED)
101 fprintf(fp, "fpe_ts ");
102 else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_DIVISION)
103 fprintf(fp, "fpe_pd ");
104 else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_SQRT)
105 fprintf(fp, "fpe_psqr ");
106
107 if (header.message_type)
108 fprintf(fp, "%s ", bi_message_type_name(header.message_type));
109
110 if (header.terminate_discarded_threads)
111 fprintf(fp, "td ");
112
113 if (header.next_clause_prefetch)
114 fprintf(fp, "ncph ");
115
116 if (header.next_message_type)
117 fprintf(fp, "next_%s ", bi_message_type_name(header.next_message_type));
118 if (header.dependency_wait != 0) {
119 fprintf(fp, "dwb(");
120 bool first = true;
121 for (unsigned i = 0; i < 8; i++) {
122 if (header.dependency_wait & (1 << i)) {
123 if (!first) {
124 fprintf(fp, ", ");
125 }
126 fprintf(fp, "%d", i);
127 first = false;
128 }
129 }
130 fprintf(fp, ") ");
131 }
132
133 fprintf(fp, "\n");
134 }
135
DecodeRegCtrl(FILE * fp,struct bifrost_regs regs,bool first)136 static struct bifrost_reg_ctrl DecodeRegCtrl(FILE *fp, struct bifrost_regs regs, bool first)
137 {
138 struct bifrost_reg_ctrl decoded = {};
139 unsigned ctrl;
140 if (regs.ctrl == 0) {
141 ctrl = regs.reg1 >> 2;
142 decoded.read_reg0 = !(regs.reg1 & 0x2);
143 decoded.read_reg1 = false;
144 } else {
145 ctrl = regs.ctrl;
146 decoded.read_reg0 = decoded.read_reg1 = true;
147 }
148
149 /* Modify control based on state */
150 if (first)
151 ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1);
152 else if (regs.reg2 == regs.reg3)
153 ctrl += 16;
154
155 decoded.slot23 = bifrost_reg_ctrl_lut[ctrl];
156 ASSERTED struct bifrost_reg_ctrl_23 reserved = { 0 };
157 assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved)));
158
159 return decoded;
160 }
161
dump_regs(FILE * fp,struct bifrost_regs srcs,bool first)162 static void dump_regs(FILE *fp, struct bifrost_regs srcs, bool first)
163 {
164 struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first);
165 fprintf(fp, "# ");
166 if (ctrl.read_reg0)
167 fprintf(fp, "slot 0: r%d ", get_reg0(srcs));
168 if (ctrl.read_reg1)
169 fprintf(fp, "slot 1: r%d ", get_reg1(srcs));
170
171 const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD";
172
173 if (ctrl.slot23.slot2 == BIFROST_OP_WRITE)
174 fprintf(fp, "slot 2: r%d (write FMA) ", srcs.reg2);
175 else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO)
176 fprintf(fp, "slot 2: r%d (write lo FMA) ", srcs.reg2);
177 else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI)
178 fprintf(fp, "slot 2: r%d (write hi FMA) ", srcs.reg2);
179 else if (ctrl.slot23.slot2 == BIFROST_OP_READ)
180 fprintf(fp, "slot 2: r%d (read) ", srcs.reg2);
181
182 if (ctrl.slot23.slot3 == BIFROST_OP_WRITE)
183 fprintf(fp, "slot 3: r%d (write %s) ", srcs.reg3, slot3_fma);
184 else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO)
185 fprintf(fp, "slot 3: r%d (write lo %s) ", srcs.reg3, slot3_fma);
186 else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI)
187 fprintf(fp, "slot 3: r%d (write hi %s) ", srcs.reg3, slot3_fma);
188
189 if (srcs.fau_idx) {
190 if (srcs.fau_idx & 0x80) {
191 fprintf(fp, "uniform: u%d", (srcs.fau_idx & 0x7f) * 2);
192 }
193 }
194
195 fprintf(fp, "\n");
196 }
197
198 static void
bi_disasm_dest_mask(FILE * fp,enum bifrost_reg_op op)199 bi_disasm_dest_mask(FILE *fp, enum bifrost_reg_op op)
200 {
201 if (op == BIFROST_OP_WRITE_LO)
202 fprintf(fp, ".h0");
203 else if (op == BIFROST_OP_WRITE_HI)
204 fprintf(fp, ".h1");
205 }
206
207 void
bi_disasm_dest_fma(FILE * fp,struct bifrost_regs * next_regs,bool last)208 bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool last)
209 {
210 /* If this is the last instruction, next_regs points to the first reg entry. */
211 struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
212 if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) {
213 fprintf(fp, "r%u:t0", next_regs->reg2);
214 bi_disasm_dest_mask(fp, ctrl.slot23.slot2);
215 } else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) {
216 fprintf(fp, "r%u:t0", next_regs->reg3);
217 bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
218 } else
219 fprintf(fp, "t0");
220 }
221
222 void
bi_disasm_dest_add(FILE * fp,struct bifrost_regs * next_regs,bool last)223 bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool last)
224 {
225 /* If this is the last instruction, next_regs points to the first reg entry. */
226 struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
227
228 if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) {
229 fprintf(fp, "r%u:t1", next_regs->reg3);
230 bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
231 } else
232 fprintf(fp, "t1");
233 }
234
dump_const_imm(FILE * fp,uint32_t imm)235 static void dump_const_imm(FILE *fp, uint32_t imm)
236 {
237 union {
238 float f;
239 uint32_t i;
240 } fi;
241 fi.i = imm;
242 fprintf(fp, "0x%08x /* %f */", imm, fi.f);
243 }
244
245 static void
dump_pc_imm(FILE * fp,uint64_t imm,enum bi_constmod mod,bool high32)246 dump_pc_imm(FILE *fp, uint64_t imm, enum bi_constmod mod, bool high32)
247 {
248 /* 60-bit sign-extend */
249 uint64_t zx64 = (imm << 4);
250 int64_t sx64 = zx64;
251 sx64 >>= 4;
252
253 /* 28-bit sign extend x 2 */
254 uint32_t imm32[2] = { (uint32_t) imm, (uint32_t) (imm >> 32) };
255 uint32_t zx32[2] = { imm32[0] << 4, imm32[1] << 4 };
256 int32_t sx32[2] = { zx32[0], zx32[1] };
257 sx32[0] >>= 4;
258 sx32[1] >>= 4;
259
260 switch (mod) {
261 case BI_CONSTMOD_PC_LO:
262 fprintf(fp, "(pc + %" PRId64 ")%s",
263 sx64,
264 high32 ? " >> 32" : "");
265 break;
266 case BI_CONSTMOD_PC_HI:
267 if (high32)
268 fprintf(fp, "(pc + %d)", sx32[1]);
269 else
270 dump_const_imm(fp, imm);
271 break;
272 case BI_CONSTMOD_PC_LO_HI:
273 fprintf(fp, "(pc + %d)", sx32[high32]);
274 break;
275 default:
276 unreachable("Invalid PC modifier");
277 }
278 }
279
280 /* Convert an index to an embedded constant in FAU-RAM to the index of the
281 * embedded constant. No, it's not in order. Yes, really. */
282
283 static unsigned
const_fau_to_idx(unsigned fau_value)284 const_fau_to_idx(unsigned fau_value)
285 {
286 unsigned map[8] = {
287 ~0, ~0, 4, 5, 0, 1, 2, 3
288 };
289
290 assert(map[fau_value] < 6);
291 return map[fau_value];
292 }
293
dump_fau_src(FILE * fp,struct bifrost_regs srcs,struct bi_constants * consts,bool high32)294 static void dump_fau_src(FILE *fp, struct bifrost_regs srcs, struct bi_constants *consts, bool high32)
295 {
296 if (srcs.fau_idx & 0x80) {
297 unsigned uniform = (srcs.fau_idx & 0x7f);
298 fprintf(fp, "u%d.w%d", uniform, high32);
299 } else if (srcs.fau_idx >= 0x20) {
300 unsigned idx = const_fau_to_idx(srcs.fau_idx >> 4);
301 uint64_t imm = consts->raw[idx];
302 imm |= (srcs.fau_idx & 0xf);
303 if (consts->mods[idx] != BI_CONSTMOD_NONE)
304 dump_pc_imm(fp, imm, consts->mods[idx], high32);
305 else if (high32)
306 dump_const_imm(fp, imm >> 32);
307 else
308 dump_const_imm(fp, imm);
309 } else {
310 switch (srcs.fau_idx) {
311 case 0:
312 fprintf(fp, "#0");
313 break;
314 case 1:
315 fprintf(fp, "lane_id");
316 break;
317 case 2:
318 fprintf(fp, "warp_id");
319 break;
320 case 3:
321 fprintf(fp, "core_id");
322 break;
323 case 4:
324 fprintf(fp, "framebuffer_size");
325 break;
326 case 5:
327 fprintf(fp, "atest_datum");
328 break;
329 case 6:
330 fprintf(fp, "sample");
331 break;
332 case 8:
333 case 9:
334 case 10:
335 case 11:
336 case 12:
337 case 13:
338 case 14:
339 case 15:
340 fprintf(fp, "blend_descriptor_%u", (unsigned) srcs.fau_idx - 8);
341 break;
342 default:
343 fprintf(fp, "XXX - reserved%u", (unsigned) srcs.fau_idx);
344 break;
345 }
346
347 if (high32)
348 fprintf(fp, ".y");
349 else
350 fprintf(fp, ".x");
351 }
352 }
353
354 void
dump_src(FILE * fp,unsigned src,struct bifrost_regs srcs,struct bi_constants * consts,bool isFMA)355 dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, struct bi_constants *consts, bool isFMA)
356 {
357 switch (src) {
358 case 0:
359 fprintf(fp, "r%d", get_reg0(srcs));
360 break;
361 case 1:
362 fprintf(fp, "r%d", get_reg1(srcs));
363 break;
364 case 2:
365 fprintf(fp, "r%d", srcs.reg2);
366 break;
367 case 3:
368 if (isFMA)
369 fprintf(fp, "#0");
370 else
371 fprintf(fp, "t"); // i.e. the output of FMA this cycle
372 break;
373 case 4:
374 dump_fau_src(fp, srcs, consts, false);
375 break;
376 case 5:
377 dump_fau_src(fp, srcs, consts, true);
378 break;
379 case 6:
380 fprintf(fp, "t0");
381 break;
382 case 7:
383 fprintf(fp, "t1");
384 break;
385 }
386 }
387
388 /* Tables for decoding M0, or if M0 == 7, M1 respectively.
389 *
390 * XXX: It's not clear if the third entry of M1_table corresponding to (7, 2)
391 * should have PC_LO_HI in the EC1 slot, or it's a weird hybrid mode? I would
392 * say this needs testing but no code should ever actually use this mode.
393 */
394
395 static const enum bi_constmod M1_table[7][2] = {
396 { BI_CONSTMOD_NONE, BI_CONSTMOD_NONE },
397 { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },
398 { BI_CONSTMOD_PC_LO, BI_CONSTMOD_PC_LO },
399 { ~0, ~0 },
400 { BI_CONSTMOD_PC_HI, BI_CONSTMOD_NONE },
401 { BI_CONSTMOD_PC_HI, BI_CONSTMOD_PC_HI },
402 { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },
403 };
404
405 static const enum bi_constmod M2_table[4][2] = {
406 { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_NONE },
407 { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },
408 { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_LO_HI },
409 { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },
410 };
411
412 static void
decode_M(enum bi_constmod * mod,unsigned M1,unsigned M2,bool single)413 decode_M(enum bi_constmod *mod, unsigned M1, unsigned M2, bool single)
414 {
415 if (M1 >= 8) {
416 mod[0] = BI_CONSTMOD_NONE;
417
418 if (!single)
419 mod[1] = BI_CONSTMOD_NONE;
420
421 return;
422 } else if (M1 == 7) {
423 assert(M2 < 4);
424 memcpy(mod, M2_table[M2], sizeof(*mod) * (single ? 1 : 2));
425 } else {
426 assert(M1 != 3);
427 memcpy(mod, M1_table[M1], sizeof(*mod) * (single ? 1 : 2));
428 }
429 }
430
dump_clause(FILE * fp,uint32_t * words,unsigned * size,unsigned offset,bool verbose)431 static bool dump_clause(FILE *fp, uint32_t *words, unsigned *size, unsigned offset, bool verbose)
432 {
433 // State for a decoded clause
434 struct bifrost_alu_inst instrs[8] = {};
435 struct bi_constants consts = {};
436 unsigned num_instrs = 0;
437 unsigned num_consts = 0;
438 uint64_t header_bits = 0;
439 bool stopbit = false;
440
441 unsigned i;
442 for (i = 0; ; i++, words += 4) {
443 if (verbose) {
444 fprintf(fp, "# ");
445 for (int j = 0; j < 4; j++)
446 fprintf(fp, "%08x ", words[3 - j]); // low bit on the right
447 fprintf(fp, "\n");
448 }
449 unsigned tag = bits(words[0], 0, 8);
450
451 // speculatively decode some things that are common between many formats, so we can share some code
452 struct bifrost_alu_inst main_instr = {};
453 // 20 bits
454 main_instr.add_bits = bits(words[2], 2, 32 - 13);
455 // 23 bits
456 main_instr.fma_bits = bits(words[1], 11, 32) | bits(words[2], 0, 2) << (32 - 11);
457 // 35 bits
458 main_instr.reg_bits = ((uint64_t) bits(words[1], 0, 11)) << 24 | (uint64_t) bits(words[0], 8, 32);
459
460 uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t) words[1] << 28 | bits(words[2], 0, 4) << 60;
461 uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t) words[3] << 32;
462
463 /* Z-bit */
464 bool stop = tag & 0x40;
465
466 if (verbose) {
467 fprintf(fp, "# tag: 0x%02x\n", tag);
468 }
469 if (tag & 0x80) {
470 /* Format 5 or 10 */
471 unsigned idx = stop ? 5 : 2;
472 main_instr.add_bits |= ((tag >> 3) & 0x7) << 17;
473 instrs[idx + 1] = main_instr;
474 instrs[idx].add_bits = bits(words[3], 0, 17) | ((tag & 0x7) << 17);
475 instrs[idx].fma_bits |= bits(words[2], 19, 32) << 10;
476 consts.raw[0] = bits(words[3], 17, 32) << 4;
477 } else {
478 bool done = false;
479 switch ((tag >> 3) & 0x7) {
480 case 0x0:
481 switch (tag & 0x7) {
482 case 0x3:
483 /* Format 1 */
484 main_instr.add_bits |= bits(words[3], 29, 32) << 17;
485 instrs[1] = main_instr;
486 num_instrs = 2;
487 done = stop;
488 break;
489 case 0x4:
490 /* Format 3 */
491 instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
492 instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
493 consts.raw[0] = const0;
494 decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);
495 num_instrs = 3;
496 num_consts = 1;
497 done = stop;
498 break;
499 case 0x1:
500 case 0x5:
501 /* Format 4 */
502 instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
503 instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
504 main_instr.add_bits |= bits(words[3], 26, 29) << 17;
505 instrs[3] = main_instr;
506 if ((tag & 0x7) == 0x5) {
507 num_instrs = 4;
508 done = stop;
509 }
510 break;
511 case 0x6:
512 /* Format 8 */
513 instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
514 instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
515 consts.raw[0] = const0;
516 decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);
517 num_instrs = 6;
518 num_consts = 1;
519 done = stop;
520 break;
521 case 0x7:
522 /* Format 9 */
523 instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
524 instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
525 main_instr.add_bits |= bits(words[3], 26, 29) << 17;
526 instrs[6] = main_instr;
527 num_instrs = 7;
528 done = stop;
529 break;
530 default:
531 unreachable("[INSTR_INVALID_ENC] Invalid tag bits");
532 }
533 break;
534 case 0x2:
535 case 0x3: {
536 /* Format 6 or 11 */
537 unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7;
538 main_instr.add_bits |= (tag & 0x7) << 17;
539 instrs[idx] = main_instr;
540 consts.raw[0] |= (bits(words[2], 19, 32) | ((uint64_t) words[3] << 13)) << 19;
541 num_consts = 1;
542 num_instrs = idx + 1;
543 done = stop;
544 break;
545 }
546 case 0x4: {
547 /* Format 2 */
548 unsigned idx = stop ? 4 : 1;
549 main_instr.add_bits |= (tag & 0x7) << 17;
550 instrs[idx] = main_instr;
551 instrs[idx + 1].fma_bits |= bits(words[3], 22, 32);
552 instrs[idx + 1].reg_bits = bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19));
553 break;
554 }
555 case 0x1:
556 /* Format 0 - followed by constants */
557 num_instrs = 1;
558 done = stop;
559 /* fallthrough */
560 case 0x5:
561 /* Format 0 - followed by instructions */
562 header_bits = bits(words[2], 19, 32) | ((uint64_t) words[3] << (32 - 19));
563 main_instr.add_bits |= (tag & 0x7) << 17;
564 instrs[0] = main_instr;
565 break;
566 case 0x6:
567 case 0x7: {
568 /* Format 12 */
569 unsigned pos = tag & 0xf;
570 // note that `pos' encodes both the total number of
571 // instructions and the position in the constant stream,
572 // presumably because decoded constants and instructions
573 // share a buffer in the decoder, but we only care about
574 // the position in the constant stream; the total number of
575 // instructions is redundant.
576 unsigned const_idx = 0;
577 switch (pos) {
578 case 0:
579 case 1:
580 case 2:
581 case 6:
582 const_idx = 0;
583 break;
584 case 3:
585 case 4:
586 case 7:
587 case 9:
588 const_idx = 1;
589 break;
590 case 5:
591 case 0xa:
592 const_idx = 2;
593 break;
594 case 8:
595 case 0xb:
596 case 0xc:
597 const_idx = 3;
598 break;
599 case 0xd:
600 const_idx = 4;
601 break;
602 case 0xe:
603 const_idx = 5;
604 break;
605 default:
606 fprintf(fp, "# unknown pos 0x%x\n", pos);
607 break;
608 }
609
610 if (num_consts < const_idx + 2)
611 num_consts = const_idx + 2;
612
613 consts.raw[const_idx] = const0;
614 consts.raw[const_idx + 1] = const1;
615
616 /* Calculate M values from A, B and 4-bit
617 * unsigned arithmetic */
618
619 signed A1 = bits(words[2], 0, 4);
620 signed B1 = bits(words[3], 28, 32);
621 signed A2 = bits(words[1], 0, 4);
622 signed B2 = bits(words[2], 28, 32);
623
624 unsigned M1 = (A1 - B1) % 16;
625 unsigned M2 = (A2 - B2) % 16;
626
627 decode_M(&consts.mods[const_idx], M1, M2, false);
628
629 done = stop;
630 break;
631 }
632 default:
633 break;
634 }
635
636 if (done)
637 break;
638 }
639 }
640
641 *size = i + 1;
642
643 if (verbose) {
644 fprintf(fp, "# header: %012" PRIx64 "\n", header_bits);
645 }
646
647 struct bifrost_header header;
648 memcpy((char *) &header, (char *) &header_bits, sizeof(struct bifrost_header));
649 dump_header(fp, header, verbose);
650 if (header.flow_control == BIFROST_FLOW_END)
651 stopbit = true;
652
653 fprintf(fp, "{\n");
654 for (i = 0; i < num_instrs; i++) {
655 struct bifrost_regs regs, next_regs;
656 if (i + 1 == num_instrs) {
657 memcpy((char *) &next_regs, (char *) &instrs[0].reg_bits,
658 sizeof(next_regs));
659 } else {
660 memcpy((char *) &next_regs, (char *) &instrs[i + 1].reg_bits,
661 sizeof(next_regs));
662 }
663
664 memcpy((char *) ®s, (char *) &instrs[i].reg_bits, sizeof(regs));
665
666 if (verbose) {
667 fprintf(fp, "# regs: %016" PRIx64 "\n", instrs[i].reg_bits);
668 dump_regs(fp, regs, i == 0);
669 }
670
671 bi_disasm_fma(fp, instrs[i].fma_bits, ®s, &next_regs,
672 header.staging_register, offset, &consts,
673 i + 1 == num_instrs);
674
675 bi_disasm_add(fp, instrs[i].add_bits, ®s, &next_regs,
676 header.staging_register, offset, &consts,
677 i + 1 == num_instrs);
678 }
679 fprintf(fp, "}\n");
680
681 if (verbose) {
682 for (unsigned i = 0; i < num_consts; i++) {
683 fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i, consts.raw[i] & 0xffffffff);
684 fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i + 1, consts.raw[i] >> 32);
685 }
686 }
687 return stopbit;
688 }
689
disassemble_bifrost(FILE * fp,uint8_t * code,size_t size,bool verbose)690 void disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose)
691 {
692 uint32_t *words = (uint32_t *) code;
693 uint32_t *words_end = words + (size / 4);
694 // used for displaying branch targets
695 unsigned offset = 0;
696 while (words != words_end) {
697 // we don't know what the program-end bit is quite yet, so for now just
698 // assume that an all-0 quadword is padding
699 uint32_t zero[4] = {};
700 if (memcmp(words, zero, 4 * sizeof(uint32_t)) == 0)
701 break;
702 fprintf(fp, "clause_%d:\n", offset);
703 unsigned size;
704 if (dump_clause(fp, words, &size, offset, verbose) == true) {
705 break;
706 }
707 words += size * 4;
708 offset += size;
709 }
710 }
711
712