1 /*
2 * Copyright (c) 2018 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /*
25 * Decoder for "new" GL_OES_get_program_binary format.
26 *
27 * Overall structure is:
28 *
29 * - header at top, contains, amongst other things, offsets of
30 * per shader stage sections.
31 * - per shader stage section (shader_info) starts with a header,
32 * followed by a variably length list of descriptors. Each
33 * descriptor has a type/count/size plus offset from the start
34 * of shader_info section where the data is found
35 */
36
37 #include <assert.h>
38 #include <ctype.h>
39 #include <fcntl.h>
40 #include <stddef.h>
41 #include <stdint.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 #include <sys/stat.h>
47 #include <sys/types.h>
48
49 #include "disasm.h"
50 #include "io.h"
51 #include "redump.h"
52 #include "util.h"
53
54 const char *infile;
55 static int dump_full = 0;
56 static int dump_offsets = 0;
57 static int gpu_id = 320;
58 static int shaderdb = 0; /* output shaderdb style traces to stderr */
59
60 struct state {
61 char *buf;
62 int sz;
63 int lvl;
64
65 /* current shader_info section, some offsets calculated relative to
66 * this, rather than relative to start of buffer.
67 */
68 void *shader;
69
70 /* size of each entry within a shader_descriptor_blk: */
71 int desc_size;
72
73 const char *shader_type;
74 int full_regs;
75 int half_regs;
76 };
77
78 #define PACKED __attribute__((__packed__))
79
80 #define OFF(field) \
81 do { \
82 if (dump_offsets) \
83 printf("%08x: ", (uint32_t)((char *)&field - state->buf)); \
84 } while (0)
85
86 /* decode field as hex */
87 #define X(s, field) \
88 do { \
89 OFF(s->field); \
90 printf("%s%12s:\t0x%x\n", tab(state->lvl), #field, s->field); \
91 } while (0)
92
93 /* decode field as digit */
94 #define D(s, field) \
95 do { \
96 OFF(s->field); \
97 printf("%s%12s:\t%u\n", tab(state->lvl), #field, s->field); \
98 } while (0)
99
100 /* decode field as float/hex */
101 #define F(s, field) \
102 do { \
103 OFF(s->field); \
104 printf("%s%12s:\t%f (0x%0x)\n", tab(state->lvl), #field, uif(s->field), \
105 s->field); \
106 } while (0)
107
108 /* decode field as register: (type is 'r' or 'c') */
109 #define R(s, field, type) \
110 do { \
111 OFF(s->field); \
112 printf("%s%12s:\t%c%u.%c\n", tab(state->lvl), #field, type, \
113 (s->field >> 2), "xyzw"[s->field & 0x3]); \
114 } while (0)
115
116 /* decode inline string (presumably null terminated?) */
117 #define S(s, field) \
118 do { \
119 OFF(s->field); \
120 printf("%s%12s:\t%s\n", tab(state->lvl), #field, s->field); \
121 } while (0)
122
123 /* decode string-table string */
124 #define T(s, field) TODO
125
126 /* decode field as unknown */
127 #define U(s, start, end) \
128 dump_unknown(state, s->unk_##start##_##end, 0x##start, \
129 (4 + 0x##end - 0x##start) / 4)
130
131 /* decode field as offset to other section */
132 #define O(s, field, type) \
133 do { \
134 X(s, field); \
135 assert(s->field < state->sz); \
136 void *_p = &state->buf[s->field]; \
137 state->lvl++; \
138 decode_##type(state, _p); \
139 state->lvl--; \
140 } while (0)
141
142 struct shader_info;
143 static void decode_shader_info(struct state *state, struct shader_info *info);
144
145 static void
dump_unknown(struct state * state,void * buf,unsigned start,unsigned n)146 dump_unknown(struct state *state, void *buf, unsigned start, unsigned n)
147 {
148 uint32_t *ptr = buf;
149 uint8_t *ascii = buf;
150
151 for (unsigned i = 0; i < n; i++) {
152 uint32_t d = ptr[i];
153
154 if (dump_offsets)
155 printf("%08x:", (uint32_t)((char *)&ptr[i] - state->buf));
156
157 printf("%s %04x:\t%08x", tab(state->lvl), start + i * 4, d);
158
159 printf("\t|");
160 for (unsigned j = 0; j < 4; j++) {
161 uint8_t c = *(ascii++);
162 printf("%c", (isascii(c) && !iscntrl(c)) ? c : '.');
163 }
164 printf("|\t%f", uif(d));
165
166 /* TODO maybe scan for first non-null and non-ascii char starting from
167 * end of shader binary to (roughly) establish the start of the string
168 * table.. that would be a bit better filter for deciding if something
169 * might be a pointer into the string table. Also, the previous char
170 * to what it points to should probably be null.
171 */
172 if ((d < state->sz) && isascii(state->buf[d]) &&
173 (strlen(&state->buf[d]) > 2) && isascii(state->buf[d + 1]))
174 printf("\t<== %s", &state->buf[d]);
175
176 printf("\n");
177 }
178 }
179
180 struct PACKED header {
181 uint32_t version; /* I guess, always b10bcace ? */
182 uint32_t unk_0004_0014[5];
183 uint32_t size;
184 uint32_t size2; /* just to be sure? */
185 uint32_t unk_0020_0020[1];
186 uint32_t
187 chksum; /* I guess? Small changes seem to result in big diffs here */
188 uint32_t unk_0028_0050[11];
189 uint32_t fs_info; /* offset of FS shader_info section */
190 uint32_t unk_0058_0090[15];
191 uint32_t vs_info; /* offset of VS shader_info section */
192 uint32_t unk_0098_00b0[7];
193 uint32_t vs_info2; /* offset of VS shader_info section (again?) */
194 uint32_t unk_00b8_0110[23];
195 uint32_t bs_info; /* offset of binning shader_info section */
196 };
197
198 static void
decode_header(struct state * state,struct header * hdr)199 decode_header(struct state *state, struct header *hdr)
200 {
201 X(hdr, version);
202 U(hdr, 0004, 0014);
203 X(hdr, size);
204 X(hdr, size2);
205 U(hdr, 0020, 0020);
206 X(hdr, chksum);
207 U(hdr, 0028, 0050);
208 state->shader_type = "FRAG";
209 O(hdr, fs_info, shader_info);
210 U(hdr, 0058, 0090);
211 state->shader_type = "VERT";
212 O(hdr, vs_info, shader_info);
213 U(hdr, 0098, 00b0);
214 assert(hdr->vs_info ==
215 hdr->vs_info2); /* not sure what this if it is ever different */
216 X(hdr, vs_info2);
217 U(hdr, 00b8, 0110);
218 state->shader_type = "BVERT";
219 O(hdr, bs_info, shader_info);
220
221 /* not sure how much of the rest of contents before start of fs_info
222 * is the header, vs other things.. just dump it all as unknown for
223 * now:
224 */
225 dump_unknown(state, (void *)hdr + sizeof(*hdr), sizeof(*hdr),
226 (hdr->fs_info - sizeof(*hdr)) / 4);
227 }
228
229 struct PACKED shader_entry_point {
230 /* entry point name, ie. "main" of TBD length, followed by unknown */
231 char name[8];
232 };
233
234 static void
decode_shader_entry_point(struct state * state,struct shader_entry_point * e)235 decode_shader_entry_point(struct state *state, struct shader_entry_point *e)
236 {
237 S(e, name);
238 }
239
240 struct PACKED shader_config {
241 uint32_t unk_0000_0008[3];
242 uint32_t full_regs;
243 uint32_t half_regs;
244 };
245
246 static void
decode_shader_config(struct state * state,struct shader_config * cfg)247 decode_shader_config(struct state *state, struct shader_config *cfg)
248 {
249 U(cfg, 0000, 0008);
250 D(cfg, full_regs);
251 D(cfg, half_regs);
252
253 state->full_regs = cfg->full_regs;
254 state->half_regs = cfg->half_regs;
255
256 /* dump reset of unknown (size differs btwn versions) */
257 dump_unknown(state, (void *)cfg + sizeof(*cfg), sizeof(*cfg),
258 (state->desc_size - sizeof(*cfg)) / 4);
259 }
260
261 struct PACKED shader_io_block {
262 /* name of TBD length followed by unknown.. 42 dwords total */
263 char name[20];
264 uint32_t unk_0014_00a4[37];
265 };
266
267 static void
decode_shader_io_block(struct state * state,struct shader_io_block * io)268 decode_shader_io_block(struct state *state, struct shader_io_block *io)
269 {
270 S(io, name);
271 U(io, 0014, 00a4);
272 }
273
274 struct PACKED shader_constant_block {
275 uint32_t value;
276 uint32_t unk_0004_000c[3];
277 uint32_t regid;
278 uint32_t unk_0014_0024[5];
279 };
280
281 static void
decode_shader_constant_block(struct state * state,struct shader_constant_block * c)282 decode_shader_constant_block(struct state *state,
283 struct shader_constant_block *c)
284 {
285 F(c, value);
286 U(c, 0004, 000c);
287 R(c, regid, 'c');
288 U(c, 0014, 0024);
289 }
290
291 enum {
292 ENTRY_POINT = 0, /* shader_entry_point */
293 SHADER_CONFIG = 1, /* XXX placeholder name */
294 SHADER_INPUT = 2, /* shader_io_block */
295 SHADER_OUTPUT = 3, /* shader_io_block */
296 CONSTANTS = 6, /* shader_constant_block */
297 INTERNAL = 8, /* internal input, like bary.f coord */
298 SHADER = 10,
299 } shader_info_block_type;
300
301 /* Refers to location of some type of records, with an offset relative to
302 * start of shader_info block.
303 */
304 struct PACKED shader_descriptor_block {
305 uint32_t type; /* block type */
306 uint32_t offset; /* offset (relative to start of shader_info block) */
307 uint32_t size; /* size in bytes */
308 uint32_t count; /* number of records */
309 uint32_t unk_0010_0010[1];
310 };
311
312 static void
decode_shader_descriptor_block(struct state * state,struct shader_descriptor_block * blk)313 decode_shader_descriptor_block(struct state *state,
314 struct shader_descriptor_block *blk)
315 {
316 D(blk, type);
317 X(blk, offset);
318 D(blk, size);
319 D(blk, count);
320 U(blk, 0010, 0010);
321
322 /* offset relative to current shader block: */
323 void *ptr = state->shader + blk->offset;
324
325 if (blk->count == 0) {
326 assert(blk->size == 0);
327 } else {
328 assert((blk->size % blk->count) == 0);
329 }
330
331 state->desc_size = blk->size / blk->count;
332 state->lvl++;
333 for (unsigned i = 0; i < blk->count; i++) {
334 switch (blk->type) {
335 case ENTRY_POINT:
336 printf("%sentry point %u:\n", tab(state->lvl - 1), i);
337 decode_shader_entry_point(state, ptr);
338 break;
339 case SHADER_CONFIG:
340 printf("%sconfig %u:\n", tab(state->lvl - 1), i);
341 decode_shader_config(state, ptr);
342 break;
343 case SHADER_INPUT:
344 printf("%sinput %u:\n", tab(state->lvl - 1), i);
345 decode_shader_io_block(state, ptr);
346 break;
347 case SHADER_OUTPUT:
348 printf("%soutput %u:\n", tab(state->lvl - 1), i);
349 decode_shader_io_block(state, ptr);
350 break;
351 case INTERNAL:
352 printf("%sinternal input %u:\n", tab(state->lvl - 1), i);
353 decode_shader_io_block(state, ptr);
354 break;
355 case CONSTANTS:
356 printf("%sconstant %u:\n", tab(state->lvl - 1), i);
357 decode_shader_constant_block(state, ptr);
358 break;
359 case SHADER: {
360 struct shader_stats stats;
361 printf("%sshader %u:\n", tab(state->lvl - 1), i);
362 disasm_a3xx_stat(ptr, blk->size / 4, state->lvl, stdout, gpu_id,
363 &stats);
364 if (shaderdb) {
365 unsigned dwords = 2 * stats.instlen;
366
367 if (gpu_id >= 400) {
368 dwords = ALIGN(dwords, 16 * 2);
369 } else {
370 dwords = ALIGN(dwords, 4 * 2);
371 }
372
373 unsigned half_regs = state->half_regs;
374 unsigned full_regs = state->full_regs;
375
376 /* On a6xx w/ merged/conflicting half and full regs, the
377 * full_regs footprint will be max of full_regs and half
378 * of half_regs.. we only care about which value is higher.
379 */
380 if (gpu_id >= 600) {
381 /* footprint of half_regs in units of full_regs: */
382 unsigned half_full = (half_regs + 1) / 2;
383 if (half_full > full_regs)
384 full_regs = half_full;
385 half_regs = 0;
386 }
387
388 fprintf(stderr,
389 "%s shader: %u inst, %u nops, %u non-nops, %u dwords, "
390 "%u half, %u full, %u constlen, "
391 "%u (ss), %u (sy), %d max_sun, %d loops\n",
392 state->shader_type, stats.instructions, stats.nops,
393 stats.instructions - stats.nops, dwords, half_regs,
394 full_regs, stats.constlen, stats.ss, stats.sy, 0,
395 0); /* max_sun or loops not possible */
396 }
397 /* this is a special case in a way, blk->count is # of
398 * instructions but disasm_a3xx() decodes all instructions,
399 * so just bail.
400 */
401 i = blk->count;
402 break;
403 }
404 default:
405 dump_unknown(state, ptr, 0, state->desc_size / 4);
406 break;
407 }
408 ptr += state->desc_size;
409 }
410 state->lvl--;
411 }
412
413 /* there looks like one of these per shader, followed by "main" and
414 * some more info, and then the shader itself.
415 */
416 struct PACKED shader_info {
417 uint32_t unk_0000_0010[5];
418 uint32_t desc_off; /* offset to first descriptor block */
419 uint32_t num_blocks;
420 };
421
422 static void
decode_shader_info(struct state * state,struct shader_info * info)423 decode_shader_info(struct state *state, struct shader_info *info)
424 {
425 assert((info->desc_off % 4) == 0);
426
427 U(info, 0000, 0010);
428 X(info, desc_off);
429 D(info, num_blocks);
430
431 dump_unknown(state, &info[1], 0, (info->desc_off - sizeof(*info)) / 4);
432
433 state->shader = info;
434
435 struct shader_descriptor_block *blocks = ((void *)info) + info->desc_off;
436 for (unsigned i = 0; i < info->num_blocks; i++) {
437 printf("%sdescriptor %u:\n", tab(state->lvl), i);
438 state->lvl++;
439 decode_shader_descriptor_block(state, &blocks[i]);
440 state->lvl--;
441 }
442 }
443
444 static void
dump_program(struct state * state)445 dump_program(struct state *state)
446 {
447 struct header *hdr = (void *)state->buf;
448
449 if (dump_full)
450 dump_unknown(state, state->buf, 0, state->sz / 4);
451
452 decode_header(state, hdr);
453 }
454
455 int
main(int argc,char ** argv)456 main(int argc, char **argv)
457 {
458 enum rd_sect_type type = RD_NONE;
459 enum debug_t debug = PRINT_RAW | PRINT_STATS;
460 void *buf = NULL;
461 int sz;
462 struct io *io;
463 int raw_program = 0;
464
465 /* lame argument parsing: */
466
467 while (1) {
468 if ((argc > 1) && !strcmp(argv[1], "--verbose")) {
469 debug |= PRINT_RAW | PRINT_VERBOSE;
470 argv++;
471 argc--;
472 continue;
473 }
474 if ((argc > 1) && !strcmp(argv[1], "--expand")) {
475 debug |= EXPAND_REPEAT;
476 argv++;
477 argc--;
478 continue;
479 }
480 if ((argc > 1) && !strcmp(argv[1], "--full")) {
481 /* only short dump, original shader, symbol table, and disassembly */
482 dump_full = 1;
483 argv++;
484 argc--;
485 continue;
486 }
487 if ((argc > 1) && !strcmp(argv[1], "--dump-offsets")) {
488 dump_offsets = 1;
489 argv++;
490 argc--;
491 continue;
492 }
493 if ((argc > 1) && !strcmp(argv[1], "--raw")) {
494 raw_program = 1;
495 argv++;
496 argc--;
497 continue;
498 }
499 if ((argc > 1) && !strcmp(argv[1], "--shaderdb")) {
500 shaderdb = 1;
501 argv++;
502 argc--;
503 continue;
504 }
505 break;
506 }
507
508 if (argc != 2) {
509 fprintf(stderr, "usage: pgmdump2 [--verbose] [--expand] [--full] "
510 "[--dump-offsets] [--raw] [--shaderdb] testlog.rd\n");
511 return -1;
512 }
513
514 disasm_a3xx_set_debug(debug);
515
516 infile = argv[1];
517
518 io = io_open(infile);
519 if (!io) {
520 fprintf(stderr, "could not open: %s\n", infile);
521 return -1;
522 }
523
524 if (raw_program) {
525 io_readn(io, &sz, 4);
526 free(buf);
527
528 /* note: allow hex dumps to go a bit past the end of the buffer..
529 * might see some garbage, but better than missing the last few bytes..
530 */
531 buf = calloc(1, sz + 3);
532 io_readn(io, buf + 4, sz);
533 (*(int *)buf) = sz;
534
535 struct state state = {
536 .buf = buf,
537 .sz = sz,
538 };
539 printf("############################################################\n");
540 printf("program:\n");
541 dump_program(&state);
542 printf("############################################################\n");
543 return 0;
544 }
545
546 /* figure out what sort of input we are dealing with: */
547 if (!(check_extension(infile, ".rd") || check_extension(infile, ".rd.gz"))) {
548 int ret;
549 buf = calloc(1, 100 * 1024);
550 ret = io_readn(io, buf, 100 * 1024);
551 if (ret < 0) {
552 fprintf(stderr, "error: %m");
553 return -1;
554 }
555 return disasm_a3xx(buf, ret / 4, 0, stdout, gpu_id);
556 }
557
558 while ((io_readn(io, &type, sizeof(type)) > 0) &&
559 (io_readn(io, &sz, 4) > 0)) {
560 free(buf);
561
562 /* note: allow hex dumps to go a bit past the end of the buffer..
563 * might see some garbage, but better than missing the last few bytes..
564 */
565 buf = calloc(1, sz + 3);
566 io_readn(io, buf, sz);
567
568 switch (type) {
569 case RD_TEST:
570 if (dump_full)
571 printf("test: %s\n", (char *)buf);
572 break;
573 case RD_VERT_SHADER:
574 printf("vertex shader:\n%s\n", (char *)buf);
575 break;
576 case RD_FRAG_SHADER:
577 printf("fragment shader:\n%s\n", (char *)buf);
578 break;
579 case RD_PROGRAM: {
580 struct state state = {
581 .buf = buf,
582 .sz = sz,
583 };
584 printf(
585 "############################################################\n");
586 printf("program:\n");
587 dump_program(&state);
588 printf(
589 "############################################################\n");
590 break;
591 }
592 case RD_GPU_ID:
593 gpu_id = *((unsigned int *)buf);
594 printf("gpu_id: %d\n", gpu_id);
595 break;
596 default:
597 break;
598 }
599 }
600
601 io_close(io);
602
603 return 0;
604 }
605