• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 Connor Abbott <cwabbott0@gmail.com>
3  * Copyright (C) 2019 Lyude Paul <thatslyude@gmail.com>
4  * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25 
26 #include <assert.h>
27 #include <inttypes.h>
28 #include <stdbool.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <string.h>
32 
33 #include "util/compiler.h"
34 #include "util/macros.h"
35 #include "bi_print_common.h"
36 #include "bifrost.h"
37 #include "disassemble.h"
38 
39 // return bits (high, lo]
40 static uint64_t
bits(uint32_t word,unsigned lo,unsigned high)41 bits(uint32_t word, unsigned lo, unsigned high)
42 {
43    if (high == 32)
44       return word >> lo;
45    return (word & ((1 << high) - 1)) >> lo;
46 }
47 
48 // each of these structs represents an instruction that's dispatched in one
49 // cycle. Note that these instructions are packed in funny ways within the
50 // clause, hence the need for a separate struct.
51 struct bifrost_alu_inst {
52    uint32_t fma_bits;
53    uint32_t add_bits;
54    uint64_t reg_bits;
55 };
56 
57 static unsigned
get_reg0(struct bifrost_regs regs)58 get_reg0(struct bifrost_regs regs)
59 {
60    if (regs.ctrl == 0)
61       return regs.reg0 | ((regs.reg1 & 0x1) << 5);
62 
63    return regs.reg0 <= regs.reg1 ? regs.reg0 : 63 - regs.reg0;
64 }
65 
66 static unsigned
get_reg1(struct bifrost_regs regs)67 get_reg1(struct bifrost_regs regs)
68 {
69    return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1;
70 }
71 
72 // this represents the decoded version of the ctrl register field.
73 struct bifrost_reg_ctrl {
74    bool read_reg0;
75    bool read_reg1;
76    struct bifrost_reg_ctrl_23 slot23;
77 };
78 
79 static void
dump_header(FILE * fp,struct bifrost_header header,bool verbose)80 dump_header(FILE *fp, struct bifrost_header header, bool verbose)
81 {
82    fprintf(fp, "ds(%u) ", header.dependency_slot);
83 
84    if (header.staging_barrier)
85       fprintf(fp, "osrb ");
86 
87    fprintf(fp, "%s ", bi_flow_control_name(header.flow_control));
88 
89    if (header.suppress_inf)
90       fprintf(fp, "inf_suppress ");
91    if (header.suppress_nan)
92       fprintf(fp, "nan_suppress ");
93 
94    if (header.flush_to_zero == BIFROST_FTZ_DX11)
95       fprintf(fp, "ftz_dx11 ");
96    else if (header.flush_to_zero == BIFROST_FTZ_ALWAYS)
97       fprintf(fp, "ftz_hsa ");
98    if (header.flush_to_zero == BIFROST_FTZ_ABRUPT)
99       fprintf(fp, "ftz_au ");
100 
101    assert(!header.zero1);
102    assert(!header.zero2);
103 
104    if (header.float_exceptions == BIFROST_EXCEPTIONS_DISABLED)
105       fprintf(fp, "fpe_ts ");
106    else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_DIVISION)
107       fprintf(fp, "fpe_pd ");
108    else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_SQRT)
109       fprintf(fp, "fpe_psqr ");
110 
111    if (header.message_type)
112       fprintf(fp, "%s ", bi_message_type_name(header.message_type));
113 
114    if (header.terminate_discarded_threads)
115       fprintf(fp, "td ");
116 
117    if (header.next_clause_prefetch)
118       fprintf(fp, "ncph ");
119 
120    if (header.next_message_type)
121       fprintf(fp, "next_%s ", bi_message_type_name(header.next_message_type));
122    if (header.dependency_wait != 0) {
123       fprintf(fp, "dwb(");
124       bool first = true;
125       for (unsigned i = 0; i < 8; i++) {
126          if (header.dependency_wait & (1 << i)) {
127             if (!first) {
128                fprintf(fp, ", ");
129             }
130             fprintf(fp, "%u", i);
131             first = false;
132          }
133       }
134       fprintf(fp, ") ");
135    }
136 
137    fprintf(fp, "\n");
138 }
139 
140 static struct bifrost_reg_ctrl
DecodeRegCtrl(FILE * fp,struct bifrost_regs regs,bool first)141 DecodeRegCtrl(FILE *fp, struct bifrost_regs regs, bool first)
142 {
143    struct bifrost_reg_ctrl decoded = {};
144    unsigned ctrl;
145    if (regs.ctrl == 0) {
146       ctrl = regs.reg1 >> 2;
147       decoded.read_reg0 = !(regs.reg1 & 0x2);
148       decoded.read_reg1 = false;
149    } else {
150       ctrl = regs.ctrl;
151       decoded.read_reg0 = decoded.read_reg1 = true;
152    }
153 
154    /* Modify control based on state */
155    if (first)
156       ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1);
157    else if (regs.reg2 == regs.reg3)
158       ctrl += 16;
159 
160    decoded.slot23 = bifrost_reg_ctrl_lut[ctrl];
161    ASSERTED struct bifrost_reg_ctrl_23 reserved = {0};
162    assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved)));
163 
164    return decoded;
165 }
166 
167 static void
dump_regs(FILE * fp,struct bifrost_regs srcs,bool first)168 dump_regs(FILE *fp, struct bifrost_regs srcs, bool first)
169 {
170    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first);
171    fprintf(fp, "    # ");
172    if (ctrl.read_reg0)
173       fprintf(fp, "slot 0: r%u ", get_reg0(srcs));
174    if (ctrl.read_reg1)
175       fprintf(fp, "slot 1: r%u ", get_reg1(srcs));
176 
177    const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD";
178 
179    if (ctrl.slot23.slot2 == BIFROST_OP_WRITE)
180       fprintf(fp, "slot 2: r%u (write FMA) ", srcs.reg2);
181    else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO)
182       fprintf(fp, "slot 2: r%u (write lo FMA) ", srcs.reg2);
183    else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI)
184       fprintf(fp, "slot 2: r%u (write hi FMA) ", srcs.reg2);
185    else if (ctrl.slot23.slot2 == BIFROST_OP_READ)
186       fprintf(fp, "slot 2: r%u (read) ", srcs.reg2);
187 
188    if (ctrl.slot23.slot3 == BIFROST_OP_WRITE)
189       fprintf(fp, "slot 3: r%u (write %s) ", srcs.reg3, slot3_fma);
190    else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO)
191       fprintf(fp, "slot 3: r%u (write lo %s) ", srcs.reg3, slot3_fma);
192    else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI)
193       fprintf(fp, "slot 3: r%u (write hi %s) ", srcs.reg3, slot3_fma);
194 
195    if (srcs.fau_idx)
196       fprintf(fp, "fau %X ", srcs.fau_idx);
197 
198    fprintf(fp, "\n");
199 }
200 
201 static void
bi_disasm_dest_mask(FILE * fp,enum bifrost_reg_op op)202 bi_disasm_dest_mask(FILE *fp, enum bifrost_reg_op op)
203 {
204    if (op == BIFROST_OP_WRITE_LO)
205       fprintf(fp, ".h0");
206    else if (op == BIFROST_OP_WRITE_HI)
207       fprintf(fp, ".h1");
208 }
209 
210 void
bi_disasm_dest_fma(FILE * fp,struct bifrost_regs * next_regs,bool last)211 bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool last)
212 {
213    /* If this is the last instruction, next_regs points to the first reg entry. */
214    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
215    if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) {
216       fprintf(fp, "r%u:t0", next_regs->reg2);
217       bi_disasm_dest_mask(fp, ctrl.slot23.slot2);
218    } else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) {
219       fprintf(fp, "r%u:t0", next_regs->reg3);
220       bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
221    } else
222       fprintf(fp, "t0");
223 }
224 
225 void
bi_disasm_dest_add(FILE * fp,struct bifrost_regs * next_regs,bool last)226 bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool last)
227 {
228    /* If this is the last instruction, next_regs points to the first reg entry. */
229    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
230 
231    if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) {
232       fprintf(fp, "r%u:t1", next_regs->reg3);
233       bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
234    } else
235       fprintf(fp, "t1");
236 }
237 
238 static void
dump_const_imm(FILE * fp,uint32_t imm)239 dump_const_imm(FILE *fp, uint32_t imm)
240 {
241    union {
242       float f;
243       uint32_t i;
244    } fi;
245    fi.i = imm;
246    fprintf(fp, "0x%08x /* %f */", imm, fi.f);
247 }
248 
249 static void
dump_pc_imm(FILE * fp,uint64_t imm,unsigned branch_offset,enum bi_constmod mod,bool high32)250 dump_pc_imm(FILE *fp, uint64_t imm, unsigned branch_offset,
251             enum bi_constmod mod, bool high32)
252 {
253    if (mod == BI_CONSTMOD_PC_HI && !high32) {
254       dump_const_imm(fp, imm);
255       return;
256    }
257 
258    /* 60-bit sign-extend */
259    uint64_t zx64 = (imm << 4);
260    int64_t sx64 = zx64;
261    sx64 >>= 4;
262 
263    /* 28-bit sign extend x 2 */
264    uint32_t imm32[2] = {(uint32_t)imm, (uint32_t)(imm >> 32)};
265    uint32_t zx32[2] = {imm32[0] << 4, imm32[1] << 4};
266    int32_t sx32[2] = {zx32[0], zx32[1]};
267    sx32[0] >>= 4;
268    sx32[1] >>= 4;
269 
270    int64_t offs = 0;
271 
272    switch (mod) {
273    case BI_CONSTMOD_PC_LO:
274       offs = sx64;
275       break;
276    case BI_CONSTMOD_PC_HI:
277       offs = sx32[1];
278       break;
279    case BI_CONSTMOD_PC_LO_HI:
280       offs = sx32[high32];
281       break;
282    default:
283       unreachable("Invalid PC modifier");
284    }
285 
286    assert((offs & 15) == 0);
287    fprintf(fp, "clause_%" PRId64, branch_offset + (offs / 16));
288 
289    if (mod == BI_CONSTMOD_PC_LO && high32)
290       fprintf(fp, " >> 32");
291 
292    /* While technically in spec, referencing the current clause as (pc +
293     * 0) likely indicates an unintended infinite loop  */
294    if (offs == 0)
295       fprintf(fp, " /* XXX: likely an infinite loop */");
296 }
297 
298 /* Convert an index to an embedded constant in FAU-RAM to the index of the
299  * embedded constant. No, it's not in order. Yes, really. */
300 
301 static unsigned
const_fau_to_idx(unsigned fau_value)302 const_fau_to_idx(unsigned fau_value)
303 {
304    unsigned map[8] = {~0, ~0, 4, 5, 0, 1, 2, 3};
305 
306    assert(map[fau_value] < 6);
307    return map[fau_value];
308 }
309 
310 static void
dump_fau_src(FILE * fp,struct bifrost_regs srcs,unsigned branch_offset,struct bi_constants * consts,bool high32)311 dump_fau_src(FILE *fp, struct bifrost_regs srcs, unsigned branch_offset,
312              struct bi_constants *consts, bool high32)
313 {
314    if (srcs.fau_idx & 0x80) {
315       unsigned uniform = (srcs.fau_idx & 0x7f);
316       fprintf(fp, "u%u.w%u", uniform, high32);
317    } else if (srcs.fau_idx >= 0x20) {
318       unsigned idx = const_fau_to_idx(srcs.fau_idx >> 4);
319       uint64_t imm = consts->raw[idx];
320       imm |= (srcs.fau_idx & 0xf);
321       if (consts->mods[idx] != BI_CONSTMOD_NONE)
322          dump_pc_imm(fp, imm, branch_offset, consts->mods[idx], high32);
323       else if (high32)
324          dump_const_imm(fp, imm >> 32);
325       else
326          dump_const_imm(fp, imm);
327    } else {
328       switch (srcs.fau_idx) {
329       case 0:
330          fprintf(fp, "#0");
331          break;
332       case 1:
333          fprintf(fp, "lane_id");
334          break;
335       case 2:
336          fprintf(fp, "warp_id");
337          break;
338       case 3:
339          fprintf(fp, "core_id");
340          break;
341       case 4:
342          fprintf(fp, "framebuffer_size");
343          break;
344       case 5:
345          fprintf(fp, "atest_datum");
346          break;
347       case 6:
348          fprintf(fp, "sample");
349          break;
350       case 8:
351       case 9:
352       case 10:
353       case 11:
354       case 12:
355       case 13:
356       case 14:
357       case 15:
358          fprintf(fp, "blend_descriptor_%u", (unsigned)srcs.fau_idx - 8);
359          break;
360       default:
361          fprintf(fp, "XXX - reserved%u", (unsigned)srcs.fau_idx);
362          break;
363       }
364 
365       if (high32)
366          fprintf(fp, ".y");
367       else
368          fprintf(fp, ".x");
369    }
370 }
371 
372 void
dump_src(FILE * fp,unsigned src,struct bifrost_regs srcs,unsigned branch_offset,struct bi_constants * consts,bool isFMA)373 dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs,
374          unsigned branch_offset, struct bi_constants *consts, bool isFMA)
375 {
376    switch (src) {
377    case 0:
378       fprintf(fp, "r%u", get_reg0(srcs));
379       break;
380    case 1:
381       fprintf(fp, "r%u", get_reg1(srcs));
382       break;
383    case 2:
384       fprintf(fp, "r%u", srcs.reg2);
385       break;
386    case 3:
387       if (isFMA)
388          fprintf(fp, "#0");
389       else
390          fprintf(fp, "t"); // i.e. the output of FMA this cycle
391       break;
392    case 4:
393       dump_fau_src(fp, srcs, branch_offset, consts, false);
394       break;
395    case 5:
396       dump_fau_src(fp, srcs, branch_offset, consts, true);
397       break;
398    case 6:
399       fprintf(fp, "t0");
400       break;
401    case 7:
402       fprintf(fp, "t1");
403       break;
404    }
405 }
406 
407 /* Tables for decoding M0, or if M0 == 7, M1 respectively.
408  *
409  * XXX: It's not clear if the third entry of M1_table corresponding to (7, 2)
410  * should have PC_LO_HI in the EC1 slot, or it's a weird hybrid mode? I would
411  * say this needs testing but no code should ever actually use this mode.
412  */
413 
414 static const enum bi_constmod M1_table[7][2] = {
415    {BI_CONSTMOD_NONE, BI_CONSTMOD_NONE},
416    {BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE},
417    {BI_CONSTMOD_PC_LO, BI_CONSTMOD_PC_LO},
418    {~0, ~0},
419    {BI_CONSTMOD_PC_HI, BI_CONSTMOD_NONE},
420    {BI_CONSTMOD_PC_HI, BI_CONSTMOD_PC_HI},
421    {BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE},
422 };
423 
424 static const enum bi_constmod M2_table[4][2] = {
425    {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_NONE},
426    {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI},
427    {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_LO_HI},
428    {BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI},
429 };
430 
431 static void
decode_M(enum bi_constmod * mod,unsigned M1,unsigned M2,bool single)432 decode_M(enum bi_constmod *mod, unsigned M1, unsigned M2, bool single)
433 {
434    if (M1 >= 8) {
435       mod[0] = BI_CONSTMOD_NONE;
436 
437       if (!single)
438          mod[1] = BI_CONSTMOD_NONE;
439 
440       return;
441    } else if (M1 == 7) {
442       assert(M2 < 4);
443       memcpy(mod, M2_table[M2], sizeof(*mod) * (single ? 1 : 2));
444    } else {
445       assert(M1 != 3);
446       memcpy(mod, M1_table[M1], sizeof(*mod) * (single ? 1 : 2));
447    }
448 }
449 
450 static void
dump_clause(FILE * fp,uint32_t * words,unsigned * size,unsigned offset,bool verbose)451 dump_clause(FILE *fp, uint32_t *words, unsigned *size, unsigned offset,
452             bool verbose)
453 {
454    // State for a decoded clause
455    struct bifrost_alu_inst instrs[8] = {};
456    struct bi_constants consts = {};
457    unsigned num_instrs = 0;
458    unsigned num_consts = 0;
459    uint64_t header_bits = 0;
460 
461    unsigned i;
462    for (i = 0;; i++, words += 4) {
463       if (verbose) {
464          fprintf(fp, "# ");
465          for (int j = 0; j < 4; j++)
466             fprintf(fp, "%08x ", words[3 - j]); // low bit on the right
467          fprintf(fp, "\n");
468       }
469       unsigned tag = bits(words[0], 0, 8);
470 
471       // speculatively decode some things that are common between many formats,
472       // so we can share some code
473       struct bifrost_alu_inst main_instr = {};
474       // 20 bits
475       main_instr.add_bits = bits(words[2], 2, 32 - 13);
476       // 23 bits
477       main_instr.fma_bits = bits(words[1], 11, 32) | bits(words[2], 0, 2)
478                                                         << (32 - 11);
479       // 35 bits
480       main_instr.reg_bits = ((uint64_t)bits(words[1], 0, 11)) << 24 |
481                             (uint64_t)bits(words[0], 8, 32);
482 
483       uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t)words[1] << 28 |
484                         bits(words[2], 0, 4) << 60;
485       uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t)words[3] << 32;
486 
487       /* Z-bit */
488       bool stop = tag & 0x40;
489 
490       if (verbose) {
491          fprintf(fp, "# tag: 0x%02x\n", tag);
492       }
493       if (tag & 0x80) {
494          /* Format 5 or 10 */
495          unsigned idx = stop ? 5 : 2;
496          main_instr.add_bits |= ((tag >> 3) & 0x7) << 17;
497          instrs[idx + 1] = main_instr;
498          instrs[idx].add_bits = bits(words[3], 0, 17) | ((tag & 0x7) << 17);
499          instrs[idx].fma_bits |= bits(words[2], 19, 32) << 10;
500          consts.raw[0] = bits(words[3], 17, 32) << 4;
501       } else {
502          bool done = false;
503          switch ((tag >> 3) & 0x7) {
504          case 0x0:
505             switch (tag & 0x7) {
506             case 0x3:
507                /* Format 1 */
508                main_instr.add_bits |= bits(words[3], 29, 32) << 17;
509                instrs[1] = main_instr;
510                num_instrs = 2;
511                done = stop;
512                break;
513             case 0x4:
514                /* Format 3 */
515                instrs[2].add_bits =
516                   bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
517                instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
518                consts.raw[0] = const0;
519                decode_M(&consts.mods[0], bits(words[2], 4, 8),
520                         bits(words[2], 8, 12), true);
521                num_instrs = 3;
522                num_consts = 1;
523                done = stop;
524                break;
525             case 0x1:
526             case 0x5:
527                /* Format 4 */
528                instrs[2].add_bits =
529                   bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
530                instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
531                main_instr.add_bits |= bits(words[3], 26, 29) << 17;
532                instrs[3] = main_instr;
533                if ((tag & 0x7) == 0x5) {
534                   num_instrs = 4;
535                   done = stop;
536                }
537                break;
538             case 0x6:
539                /* Format 8 */
540                instrs[5].add_bits =
541                   bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
542                instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
543                consts.raw[0] = const0;
544                decode_M(&consts.mods[0], bits(words[2], 4, 8),
545                         bits(words[2], 8, 12), true);
546                num_instrs = 6;
547                num_consts = 1;
548                done = stop;
549                break;
550             case 0x7:
551                /* Format 9 */
552                instrs[5].add_bits =
553                   bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
554                instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
555                main_instr.add_bits |= bits(words[3], 26, 29) << 17;
556                instrs[6] = main_instr;
557                num_instrs = 7;
558                done = stop;
559                break;
560             default:
561                unreachable("[INSTR_INVALID_ENC] Invalid tag bits");
562             }
563             break;
564          case 0x2:
565          case 0x3: {
566             /* Format 6 or 11 */
567             unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7;
568             main_instr.add_bits |= (tag & 0x7) << 17;
569             instrs[idx] = main_instr;
570             consts.raw[0] |=
571                (bits(words[2], 19, 32) | ((uint64_t)words[3] << 13)) << 19;
572             num_consts = 1;
573             num_instrs = idx + 1;
574             done = stop;
575             break;
576          }
577          case 0x4: {
578             /* Format 2 */
579             unsigned idx = stop ? 4 : 1;
580             main_instr.add_bits |= (tag & 0x7) << 17;
581             instrs[idx] = main_instr;
582             instrs[idx + 1].fma_bits |= bits(words[3], 22, 32);
583             instrs[idx + 1].reg_bits =
584                bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19));
585             break;
586          }
587          case 0x1:
588             /* Format 0 - followed by constants */
589             num_instrs = 1;
590             done = stop;
591             FALLTHROUGH;
592          case 0x5:
593             /* Format 0 - followed by instructions */
594             header_bits =
595                bits(words[2], 19, 32) | ((uint64_t)words[3] << (32 - 19));
596             main_instr.add_bits |= (tag & 0x7) << 17;
597             instrs[0] = main_instr;
598             break;
599          case 0x6:
600          case 0x7: {
601             /* Format 12 */
602             unsigned pos = tag & 0xf;
603 
604             struct {
605                unsigned const_idx;
606                unsigned nr_tuples;
607             } pos_table[0x10] = {{0, 1}, {0, 2}, {0, 4}, {1, 3},
608                                  {1, 5}, {2, 4}, {0, 7}, {1, 6},
609                                  {3, 5}, {1, 8}, {2, 7}, {3, 6},
610                                  {3, 8}, {4, 7}, {5, 6}, {~0, ~0}};
611 
612             ASSERTED bool valid_count = pos_table[pos].nr_tuples == num_instrs;
613             assert(valid_count && "INSTR_INVALID_ENC");
614 
615             unsigned const_idx = pos_table[pos].const_idx;
616 
617             if (num_consts < const_idx + 2)
618                num_consts = const_idx + 2;
619 
620             consts.raw[const_idx] = const0;
621             consts.raw[const_idx + 1] = const1;
622 
623             /* Calculate M values from A, B and 4-bit
624              * unsigned arithmetic. Mathematically it
625              * should be (A - B) % 16 but we use this
626              * alternate form to avoid sign issues */
627 
628             unsigned A1 = bits(words[2], 0, 4);
629             unsigned B1 = bits(words[3], 28, 32);
630             unsigned A2 = bits(words[1], 0, 4);
631             unsigned B2 = bits(words[2], 28, 32);
632 
633             unsigned M1 = (16 + A1 - B1) & 0xF;
634             unsigned M2 = (16 + A2 - B2) & 0xF;
635 
636             decode_M(&consts.mods[const_idx], M1, M2, false);
637 
638             done = stop;
639             break;
640          }
641          default:
642             break;
643          }
644 
645          if (done)
646             break;
647       }
648    }
649 
650    *size = i + 1;
651 
652    if (verbose) {
653       fprintf(fp, "# header: %012" PRIx64 "\n", header_bits);
654    }
655 
656    struct bifrost_header header;
657    memcpy((char *)&header, (char *)&header_bits, sizeof(struct bifrost_header));
658    dump_header(fp, header, verbose);
659 
660    fprintf(fp, "{\n");
661    for (i = 0; i < num_instrs; i++) {
662       struct bifrost_regs regs, next_regs;
663       if (i + 1 == num_instrs) {
664          memcpy((char *)&next_regs, (char *)&instrs[0].reg_bits,
665                 sizeof(next_regs));
666       } else {
667          memcpy((char *)&next_regs, (char *)&instrs[i + 1].reg_bits,
668                 sizeof(next_regs));
669       }
670 
671       memcpy((char *)&regs, (char *)&instrs[i].reg_bits, sizeof(regs));
672 
673       if (verbose) {
674          fprintf(fp, "    # regs: %016" PRIx64 "\n", instrs[i].reg_bits);
675          dump_regs(fp, regs, i == 0);
676       }
677 
678       bi_disasm_fma(fp, instrs[i].fma_bits, &regs, &next_regs,
679                     header.staging_register, offset, &consts,
680                     i + 1 == num_instrs);
681 
682       bi_disasm_add(fp, instrs[i].add_bits, &regs, &next_regs,
683                     header.staging_register, offset, &consts,
684                     i + 1 == num_instrs);
685    }
686    fprintf(fp, "}\n");
687 
688    if (verbose) {
689       for (unsigned i = 0; i < num_consts; i++) {
690          fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i,
691                  consts.raw[i] & 0xffffffff);
692          fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i + 1,
693                  consts.raw[i] >> 32);
694       }
695    }
696 
697    fprintf(fp, "\n");
698    return;
699 }
700 
701 void
disassemble_bifrost(FILE * fp,uint8_t * code,size_t size,bool verbose)702 disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose)
703 {
704    uint32_t *words = (uint32_t *)code;
705    uint32_t *words_end = words + (size / 4);
706    // used for displaying branch targets
707    unsigned offset = 0;
708    while (words != words_end) {
709       /* Shaders have zero bytes at the end for padding; stop
710        * disassembling when we hit them. */
711       if (*words == 0)
712          break;
713 
714       fprintf(fp, "clause_%u:\n", offset);
715 
716       unsigned size;
717       dump_clause(fp, words, &size, offset, verbose);
718 
719       words += size * 4;
720       offset += size;
721    }
722 }
723