• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 Connor Abbott <cwabbott0@gmail.com>
3  * Copyright (C) 2019 Lyude Paul <thatslyude@gmail.com>
4  * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25 
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <assert.h>
30 #include <inttypes.h>
31 #include <string.h>
32 
33 #include "bifrost.h"
34 #include "disassemble.h"
35 #include "bi_print_common.h"
36 #include "util/macros.h"
37 
38 // return bits (high, lo]
bits(uint32_t word,unsigned lo,unsigned high)39 static uint64_t bits(uint32_t word, unsigned lo, unsigned high)
40 {
41         if (high == 32)
42                 return word >> lo;
43         return (word & ((1 << high) - 1)) >> lo;
44 }
45 
46 // each of these structs represents an instruction that's dispatched in one
47 // cycle. Note that these instructions are packed in funny ways within the
48 // clause, hence the need for a separate struct.
49 struct bifrost_alu_inst {
50         uint32_t fma_bits;
51         uint32_t add_bits;
52         uint64_t reg_bits;
53 };
54 
get_reg0(struct bifrost_regs regs)55 static unsigned get_reg0(struct bifrost_regs regs)
56 {
57         if (regs.ctrl == 0)
58                 return regs.reg0 | ((regs.reg1 & 0x1) << 5);
59 
60         return regs.reg0 <= regs.reg1 ? regs.reg0 : 63 - regs.reg0;
61 }
62 
get_reg1(struct bifrost_regs regs)63 static unsigned get_reg1(struct bifrost_regs regs)
64 {
65         return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1;
66 }
67 
68 // this represents the decoded version of the ctrl register field.
69 struct bifrost_reg_ctrl {
70         bool read_reg0;
71         bool read_reg1;
72         struct bifrost_reg_ctrl_23 slot23;
73         bool clause_start;
74 };
75 
dump_header(FILE * fp,struct bifrost_header header,bool verbose)76 static void dump_header(FILE *fp, struct bifrost_header header, bool verbose)
77 {
78         fprintf(fp, "ds(%du) ", header.dependency_slot);
79 
80         if (header.staging_barrier)
81                 fprintf(fp, "osrb ");
82 
83         fprintf(fp, "%s ", bi_flow_control_name(header.flow_control));
84 
85         if (header.suppress_inf)
86                 fprintf(fp, "inf_suppress ");
87         if (header.suppress_nan)
88                 fprintf(fp, "nan_suppress ");
89 
90         if (header.flush_to_zero == BIFROST_FTZ_DX11)
91                 fprintf(fp, "ftz_dx11 ");
92         else if (header.flush_to_zero == BIFROST_FTZ_ALWAYS)
93                 fprintf(fp, "ftz_hsa ");
94         if (header.flush_to_zero == BIFROST_FTZ_ABRUPT)
95                 fprintf(fp, "ftz_au ");
96 
97         assert(!header.zero1);
98         assert(!header.zero2);
99 
100         if (header.float_exceptions == BIFROST_EXCEPTIONS_DISABLED)
101                 fprintf(fp, "fpe_ts ");
102         else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_DIVISION)
103                 fprintf(fp, "fpe_pd ");
104         else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_SQRT)
105                 fprintf(fp, "fpe_psqr ");
106 
107         if (header.message_type)
108                 fprintf(fp, "%s ", bi_message_type_name(header.message_type));
109 
110         if (header.terminate_discarded_threads)
111                 fprintf(fp, "td ");
112 
113         if (header.next_clause_prefetch)
114                 fprintf(fp, "ncph ");
115 
116         if (header.next_message_type)
117                 fprintf(fp, "next_%s ", bi_message_type_name(header.next_message_type));
118         if (header.dependency_wait != 0) {
119                 fprintf(fp, "dwb(");
120                 bool first = true;
121                 for (unsigned i = 0; i < 8; i++) {
122                         if (header.dependency_wait & (1 << i)) {
123                                 if (!first) {
124                                         fprintf(fp, ", ");
125                                 }
126                                 fprintf(fp, "%d", i);
127                                 first = false;
128                         }
129                 }
130                 fprintf(fp, ") ");
131         }
132 
133         fprintf(fp, "\n");
134 }
135 
DecodeRegCtrl(FILE * fp,struct bifrost_regs regs,bool first)136 static struct bifrost_reg_ctrl DecodeRegCtrl(FILE *fp, struct bifrost_regs regs, bool first)
137 {
138         struct bifrost_reg_ctrl decoded = {};
139         unsigned ctrl;
140         if (regs.ctrl == 0) {
141                 ctrl = regs.reg1 >> 2;
142                 decoded.read_reg0 = !(regs.reg1 & 0x2);
143                 decoded.read_reg1 = false;
144         } else {
145                 ctrl = regs.ctrl;
146                 decoded.read_reg0 = decoded.read_reg1 = true;
147         }
148 
149         /* Modify control based on state */
150         if (first)
151                 ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1);
152         else if (regs.reg2 == regs.reg3)
153                 ctrl += 16;
154 
155         decoded.slot23 = bifrost_reg_ctrl_lut[ctrl];
156         ASSERTED struct bifrost_reg_ctrl_23 reserved = { 0 };
157         assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved)));
158 
159         return decoded;
160 }
161 
dump_regs(FILE * fp,struct bifrost_regs srcs,bool first)162 static void dump_regs(FILE *fp, struct bifrost_regs srcs, bool first)
163 {
164         struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first);
165         fprintf(fp, "# ");
166         if (ctrl.read_reg0)
167                 fprintf(fp, "slot 0: r%d ", get_reg0(srcs));
168         if (ctrl.read_reg1)
169                 fprintf(fp, "slot 1: r%d ", get_reg1(srcs));
170 
171         const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD";
172 
173         if (ctrl.slot23.slot2 == BIFROST_OP_WRITE)
174                 fprintf(fp, "slot 2: r%d (write FMA) ", srcs.reg2);
175         else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO)
176                 fprintf(fp, "slot 2: r%d (write lo FMA) ", srcs.reg2);
177         else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI)
178                 fprintf(fp, "slot 2: r%d (write hi FMA) ", srcs.reg2);
179         else if (ctrl.slot23.slot2 == BIFROST_OP_READ)
180                 fprintf(fp, "slot 2: r%d (read) ", srcs.reg2);
181 
182         if (ctrl.slot23.slot3 == BIFROST_OP_WRITE)
183                 fprintf(fp, "slot 3: r%d (write %s) ", srcs.reg3, slot3_fma);
184         else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO)
185                 fprintf(fp, "slot 3: r%d (write lo %s) ", srcs.reg3, slot3_fma);
186         else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI)
187                 fprintf(fp, "slot 3: r%d (write hi %s) ", srcs.reg3, slot3_fma);
188 
189         if (srcs.fau_idx) {
190                 if (srcs.fau_idx & 0x80) {
191                         fprintf(fp, "uniform: u%d", (srcs.fau_idx & 0x7f) * 2);
192                 }
193         }
194 
195         fprintf(fp, "\n");
196 }
197 
198 static void
bi_disasm_dest_mask(FILE * fp,enum bifrost_reg_op op)199 bi_disasm_dest_mask(FILE *fp, enum bifrost_reg_op op)
200 {
201         if (op == BIFROST_OP_WRITE_LO)
202                 fprintf(fp, ".h0");
203         else if (op == BIFROST_OP_WRITE_HI)
204                 fprintf(fp, ".h1");
205 }
206 
207 void
bi_disasm_dest_fma(FILE * fp,struct bifrost_regs * next_regs,bool last)208 bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool last)
209 {
210     /* If this is the last instruction, next_regs points to the first reg entry. */
211     struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
212     if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) {
213         fprintf(fp, "r%u:t0", next_regs->reg2);
214         bi_disasm_dest_mask(fp, ctrl.slot23.slot2);
215     } else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) {
216         fprintf(fp, "r%u:t0", next_regs->reg3);
217         bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
218     } else
219         fprintf(fp, "t0");
220 }
221 
222 void
bi_disasm_dest_add(FILE * fp,struct bifrost_regs * next_regs,bool last)223 bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool last)
224 {
225     /* If this is the last instruction, next_regs points to the first reg entry. */
226     struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
227 
228     if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) {
229         fprintf(fp, "r%u:t1", next_regs->reg3);
230         bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
231     } else
232         fprintf(fp, "t1");
233 }
234 
dump_const_imm(FILE * fp,uint32_t imm)235 static void dump_const_imm(FILE *fp, uint32_t imm)
236 {
237         union {
238                 float f;
239                 uint32_t i;
240         } fi;
241         fi.i = imm;
242         fprintf(fp, "0x%08x /* %f */", imm, fi.f);
243 }
244 
245 static void
dump_pc_imm(FILE * fp,uint64_t imm,enum bi_constmod mod,bool high32)246 dump_pc_imm(FILE *fp, uint64_t imm, enum bi_constmod mod, bool high32)
247 {
248         /* 60-bit sign-extend */
249         uint64_t zx64 = (imm << 4);
250         int64_t sx64 = zx64;
251         sx64 >>= 4;
252 
253         /* 28-bit sign extend x 2 */
254         uint32_t imm32[2] = { (uint32_t) imm, (uint32_t) (imm >> 32) };
255         uint32_t zx32[2] = { imm32[0] << 4, imm32[1] << 4 };
256         int32_t sx32[2] = { zx32[0], zx32[1] };
257         sx32[0] >>= 4;
258         sx32[1] >>= 4;
259 
260         switch (mod) {
261         case BI_CONSTMOD_PC_LO:
262                 fprintf(fp, "(pc + %" PRId64 ")%s",
263                         sx64,
264                         high32 ? " >> 32" : "");
265                 break;
266         case BI_CONSTMOD_PC_HI:
267                 if (high32)
268                         fprintf(fp, "(pc + %d)", sx32[1]);
269                 else
270                         dump_const_imm(fp, imm);
271                 break;
272         case BI_CONSTMOD_PC_LO_HI:
273                 fprintf(fp, "(pc + %d)", sx32[high32]);
274                 break;
275         default:
276                 unreachable("Invalid PC modifier");
277         }
278 }
279 
280 /* Convert an index to an embedded constant in FAU-RAM to the index of the
281  * embedded constant. No, it's not in order. Yes, really. */
282 
283 static unsigned
const_fau_to_idx(unsigned fau_value)284 const_fau_to_idx(unsigned fau_value)
285 {
286         unsigned map[8] = {
287                 ~0, ~0, 4, 5, 0, 1, 2, 3
288         };
289 
290         assert(map[fau_value] < 6);
291         return map[fau_value];
292 }
293 
dump_fau_src(FILE * fp,struct bifrost_regs srcs,struct bi_constants * consts,bool high32)294 static void dump_fau_src(FILE *fp, struct bifrost_regs srcs, struct bi_constants *consts, bool high32)
295 {
296         if (srcs.fau_idx & 0x80) {
297                 unsigned uniform = (srcs.fau_idx & 0x7f);
298                 fprintf(fp, "u%d.w%d", uniform, high32);
299         } else if (srcs.fau_idx >= 0x20) {
300                 unsigned idx = const_fau_to_idx(srcs.fau_idx >> 4);
301                 uint64_t imm = consts->raw[idx];
302                 imm |= (srcs.fau_idx & 0xf);
303                 if (consts->mods[idx] != BI_CONSTMOD_NONE)
304                         dump_pc_imm(fp, imm, consts->mods[idx], high32);
305                 else if (high32)
306                         dump_const_imm(fp, imm >> 32);
307                 else
308                         dump_const_imm(fp, imm);
309         } else {
310                 switch (srcs.fau_idx) {
311                 case 0:
312                         fprintf(fp, "#0");
313                         break;
314                 case 1:
315                         fprintf(fp, "lane_id");
316                         break;
317                 case 2:
318                         fprintf(fp, "warp_id");
319                         break;
320                 case 3:
321                         fprintf(fp, "core_id");
322                         break;
323                 case 4:
324                         fprintf(fp, "framebuffer_size");
325                         break;
326                 case 5:
327                         fprintf(fp, "atest_datum");
328                         break;
329                 case 6:
330                         fprintf(fp, "sample");
331                         break;
332                 case 8:
333                 case 9:
334                 case 10:
335                 case 11:
336                 case 12:
337                 case 13:
338                 case 14:
339                 case 15:
340                         fprintf(fp, "blend_descriptor_%u", (unsigned) srcs.fau_idx - 8);
341                         break;
342                 default:
343                         fprintf(fp, "XXX - reserved%u", (unsigned) srcs.fau_idx);
344                         break;
345                 }
346 
347                 if (high32)
348                         fprintf(fp, ".y");
349                 else
350                         fprintf(fp, ".x");
351         }
352 }
353 
354 void
dump_src(FILE * fp,unsigned src,struct bifrost_regs srcs,struct bi_constants * consts,bool isFMA)355 dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, struct bi_constants *consts, bool isFMA)
356 {
357         switch (src) {
358         case 0:
359                 fprintf(fp, "r%d", get_reg0(srcs));
360                 break;
361         case 1:
362                 fprintf(fp, "r%d", get_reg1(srcs));
363                 break;
364         case 2:
365                 fprintf(fp, "r%d", srcs.reg2);
366                 break;
367         case 3:
368                 if (isFMA)
369                         fprintf(fp, "#0");
370                 else
371                         fprintf(fp, "t"); // i.e. the output of FMA this cycle
372                 break;
373         case 4:
374                 dump_fau_src(fp, srcs, consts, false);
375                 break;
376         case 5:
377                 dump_fau_src(fp, srcs, consts, true);
378                 break;
379         case 6:
380                 fprintf(fp, "t0");
381                 break;
382         case 7:
383                 fprintf(fp, "t1");
384                 break;
385         }
386 }
387 
388 /* Tables for decoding M0, or if M0 == 7, M1 respectively.
389  *
390  * XXX: It's not clear if the third entry of M1_table corresponding to (7, 2)
391  * should have PC_LO_HI in the EC1 slot, or it's a weird hybrid mode? I would
392  * say this needs testing but no code should ever actually use this mode.
393  */
394 
395 static const enum bi_constmod M1_table[7][2] = {
396         { BI_CONSTMOD_NONE, BI_CONSTMOD_NONE },
397         { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },
398         { BI_CONSTMOD_PC_LO, BI_CONSTMOD_PC_LO },
399         { ~0, ~0 },
400         { BI_CONSTMOD_PC_HI, BI_CONSTMOD_NONE },
401         { BI_CONSTMOD_PC_HI, BI_CONSTMOD_PC_HI },
402         { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },
403 };
404 
405 static const enum bi_constmod M2_table[4][2] = {
406         { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_NONE },
407         { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },
408         { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_LO_HI },
409         { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },
410 };
411 
412 static void
decode_M(enum bi_constmod * mod,unsigned M1,unsigned M2,bool single)413 decode_M(enum bi_constmod *mod, unsigned M1, unsigned M2, bool single)
414 {
415         if (M1 >= 8) {
416                 mod[0] = BI_CONSTMOD_NONE;
417 
418                 if (!single)
419                         mod[1] = BI_CONSTMOD_NONE;
420 
421                 return;
422         } else if (M1 == 7) {
423                 assert(M2 < 4);
424                 memcpy(mod, M2_table[M2], sizeof(*mod) * (single ? 1 : 2));
425         } else {
426                 assert(M1 != 3);
427                 memcpy(mod, M1_table[M1], sizeof(*mod) * (single ? 1 : 2));
428         }
429 }
430 
dump_clause(FILE * fp,uint32_t * words,unsigned * size,unsigned offset,bool verbose)431 static bool dump_clause(FILE *fp, uint32_t *words, unsigned *size, unsigned offset, bool verbose)
432 {
433         // State for a decoded clause
434         struct bifrost_alu_inst instrs[8] = {};
435         struct bi_constants consts = {};
436         unsigned num_instrs = 0;
437         unsigned num_consts = 0;
438         uint64_t header_bits = 0;
439         bool stopbit = false;
440 
441         unsigned i;
442         for (i = 0; ; i++, words += 4) {
443                 if (verbose) {
444                         fprintf(fp, "# ");
445                         for (int j = 0; j < 4; j++)
446                                 fprintf(fp, "%08x ", words[3 - j]); // low bit on the right
447                         fprintf(fp, "\n");
448                 }
449                 unsigned tag = bits(words[0], 0, 8);
450 
451                 // speculatively decode some things that are common between many formats, so we can share some code
452                 struct bifrost_alu_inst main_instr = {};
453                 // 20 bits
454                 main_instr.add_bits = bits(words[2], 2, 32 - 13);
455                 // 23 bits
456                 main_instr.fma_bits = bits(words[1], 11, 32) | bits(words[2], 0, 2) << (32 - 11);
457                 // 35 bits
458                 main_instr.reg_bits = ((uint64_t) bits(words[1], 0, 11)) << 24 | (uint64_t) bits(words[0], 8, 32);
459 
460                 uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t) words[1] << 28 | bits(words[2], 0, 4) << 60;
461                 uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t) words[3] << 32;
462 
463                 /* Z-bit */
464                 bool stop = tag & 0x40;
465 
466                 if (verbose) {
467                         fprintf(fp, "# tag: 0x%02x\n", tag);
468                 }
469                 if (tag & 0x80) {
470                         /* Format 5 or 10 */
471                         unsigned idx = stop ? 5 : 2;
472                         main_instr.add_bits |= ((tag >> 3) & 0x7) << 17;
473                         instrs[idx + 1] = main_instr;
474                         instrs[idx].add_bits = bits(words[3], 0, 17) | ((tag & 0x7) << 17);
475                         instrs[idx].fma_bits |= bits(words[2], 19, 32) << 10;
476                         consts.raw[0] = bits(words[3], 17, 32) << 4;
477                 } else {
478                         bool done = false;
479                         switch ((tag >> 3) & 0x7) {
480                         case 0x0:
481                                 switch (tag & 0x7) {
482                                 case 0x3:
483                                         /* Format 1 */
484                                         main_instr.add_bits |= bits(words[3], 29, 32) << 17;
485                                         instrs[1] = main_instr;
486                                         num_instrs = 2;
487                                         done = stop;
488                                         break;
489                                 case 0x4:
490                                         /* Format 3 */
491                                         instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
492                                         instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
493                                         consts.raw[0] = const0;
494                                         decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);
495                                         num_instrs = 3;
496                                         num_consts = 1;
497                                         done = stop;
498                                         break;
499                                 case 0x1:
500                                 case 0x5:
501                                         /* Format 4 */
502                                         instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
503                                         instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
504                                         main_instr.add_bits |= bits(words[3], 26, 29) << 17;
505                                         instrs[3] = main_instr;
506                                         if ((tag & 0x7) == 0x5) {
507                                                 num_instrs = 4;
508                                                 done = stop;
509                                         }
510                                         break;
511                                 case 0x6:
512                                         /* Format 8 */
513                                         instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
514                                         instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
515                                         consts.raw[0] = const0;
516                                         decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);
517                                         num_instrs = 6;
518                                         num_consts = 1;
519                                         done = stop;
520                                         break;
521                                 case 0x7:
522                                         /* Format 9 */
523                                         instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
524                                         instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
525                                         main_instr.add_bits |= bits(words[3], 26, 29) << 17;
526                                         instrs[6] = main_instr;
527                                         num_instrs = 7;
528                                         done = stop;
529                                         break;
530                                 default:
531                                         unreachable("[INSTR_INVALID_ENC] Invalid tag bits");
532                                 }
533                                 break;
534                         case 0x2:
535                         case 0x3: {
536                                 /* Format 6 or 11 */
537                                 unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7;
538                                 main_instr.add_bits |= (tag & 0x7) << 17;
539                                 instrs[idx] = main_instr;
540                                 consts.raw[0] |= (bits(words[2], 19, 32) | ((uint64_t) words[3] << 13)) << 19;
541                                 num_consts = 1;
542                                 num_instrs = idx + 1;
543                                 done = stop;
544                                 break;
545                         }
546                         case 0x4: {
547                                 /* Format 2 */
548                                 unsigned idx = stop ? 4 : 1;
549                                 main_instr.add_bits |= (tag & 0x7) << 17;
550                                 instrs[idx] = main_instr;
551                                 instrs[idx + 1].fma_bits |= bits(words[3], 22, 32);
552                                 instrs[idx + 1].reg_bits = bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19));
553                                 break;
554                         }
555                         case 0x1:
556                                 /* Format 0 - followed by constants */
557                                 num_instrs = 1;
558                                 done = stop;
559                                 /* fallthrough */
560                         case 0x5:
561                                 /* Format 0 - followed by instructions */
562                                 header_bits = bits(words[2], 19, 32) | ((uint64_t) words[3] << (32 - 19));
563                                 main_instr.add_bits |= (tag & 0x7) << 17;
564                                 instrs[0] = main_instr;
565                                 break;
566                         case 0x6:
567                         case 0x7: {
568                                 /* Format 12 */
569                                 unsigned pos = tag & 0xf;
570                                 // note that `pos' encodes both the total number of
571                                 // instructions and the position in the constant stream,
572                                 // presumably because decoded constants and instructions
573                                 // share a buffer in the decoder, but we only care about
574                                 // the position in the constant stream; the total number of
575                                 // instructions is redundant.
576                                 unsigned const_idx = 0;
577                                 switch (pos) {
578                                 case 0:
579                                 case 1:
580                                 case 2:
581                                 case 6:
582                                         const_idx = 0;
583                                         break;
584                                 case 3:
585                                 case 4:
586                                 case 7:
587                                 case 9:
588                                         const_idx = 1;
589                                         break;
590                                 case 5:
591                                 case 0xa:
592                                         const_idx = 2;
593                                         break;
594                                 case 8:
595                                 case 0xb:
596                                 case 0xc:
597                                         const_idx = 3;
598                                         break;
599                                 case 0xd:
600                                         const_idx = 4;
601                                         break;
602                                 case 0xe:
603                                         const_idx = 5;
604                                         break;
605                                 default:
606                                         fprintf(fp, "# unknown pos 0x%x\n", pos);
607                                         break;
608                                 }
609 
610                                 if (num_consts < const_idx + 2)
611                                         num_consts = const_idx + 2;
612 
613                                 consts.raw[const_idx] = const0;
614                                 consts.raw[const_idx + 1] = const1;
615 
616                                 /* Calculate M values from A, B and 4-bit
617                                  * unsigned arithmetic */
618 
619                                 signed A1 = bits(words[2], 0, 4);
620                                 signed B1 = bits(words[3], 28, 32);
621                                 signed A2 = bits(words[1], 0, 4);
622                                 signed B2 = bits(words[2], 28, 32);
623 
624                                 unsigned M1 = (A1 - B1) % 16;
625                                 unsigned M2 = (A2 - B2) % 16;
626 
627                                 decode_M(&consts.mods[const_idx], M1, M2, false);
628 
629                                 done = stop;
630                                 break;
631                         }
632                         default:
633                                 break;
634                         }
635 
636                         if (done)
637                                 break;
638                 }
639         }
640 
641         *size = i + 1;
642 
643         if (verbose) {
644                 fprintf(fp, "# header: %012" PRIx64 "\n", header_bits);
645         }
646 
647         struct bifrost_header header;
648         memcpy((char *) &header, (char *) &header_bits, sizeof(struct bifrost_header));
649         dump_header(fp, header, verbose);
650         if (header.flow_control == BIFROST_FLOW_END)
651                 stopbit = true;
652 
653         fprintf(fp, "{\n");
654         for (i = 0; i < num_instrs; i++) {
655                 struct bifrost_regs regs, next_regs;
656                 if (i + 1 == num_instrs) {
657                         memcpy((char *) &next_regs, (char *) &instrs[0].reg_bits,
658                                sizeof(next_regs));
659                 } else {
660                         memcpy((char *) &next_regs, (char *) &instrs[i + 1].reg_bits,
661                                sizeof(next_regs));
662                 }
663 
664                 memcpy((char *) &regs, (char *) &instrs[i].reg_bits, sizeof(regs));
665 
666                 if (verbose) {
667                         fprintf(fp, "# regs: %016" PRIx64 "\n", instrs[i].reg_bits);
668                         dump_regs(fp, regs, i == 0);
669                 }
670 
671                 bi_disasm_fma(fp, instrs[i].fma_bits, &regs, &next_regs,
672                                 header.staging_register, offset, &consts,
673                                 i + 1 == num_instrs);
674 
675                 bi_disasm_add(fp, instrs[i].add_bits, &regs, &next_regs,
676                                 header.staging_register, offset, &consts,
677                                 i + 1 == num_instrs);
678         }
679         fprintf(fp, "}\n");
680 
681         if (verbose) {
682                 for (unsigned i = 0; i < num_consts; i++) {
683                         fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i, consts.raw[i] & 0xffffffff);
684                         fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i + 1, consts.raw[i] >> 32);
685                 }
686         }
687         return stopbit;
688 }
689 
disassemble_bifrost(FILE * fp,uint8_t * code,size_t size,bool verbose)690 void disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose)
691 {
692         uint32_t *words = (uint32_t *) code;
693         uint32_t *words_end = words + (size / 4);
694         // used for displaying branch targets
695         unsigned offset = 0;
696         while (words != words_end) {
697                 // we don't know what the program-end bit is quite yet, so for now just
698                 // assume that an all-0 quadword is padding
699                 uint32_t zero[4] = {};
700                 if (memcmp(words, zero, 4 * sizeof(uint32_t)) == 0)
701                         break;
702                 fprintf(fp, "clause_%d:\n", offset);
703                 unsigned size;
704                 if (dump_clause(fp, words, &size, offset, verbose) == true) {
705                         break;
706                 }
707                 words += size * 4;
708                 offset += size;
709         }
710 }
711 
712