1 /*
2 * Copyright (c) 2018 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /*
25 * Decoder for "new" GL_OES_get_program_binary format.
26 *
27 * Overall structure is:
28 *
29 * - header at top, contains, amongst other things, offsets of
30 * per shader stage sections.
31 * - per shader stage section (shader_info) starts with a header,
32 * followed by a variably length list of descriptors. Each
33 * descriptor has a type/count/size plus offset from the start
34 * of shader_info section where the data is found
35 */
36
37 #include <assert.h>
38 #include <ctype.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stdint.h>
42 #include <unistd.h>
43 #include <sys/types.h>
44 #include <sys/stat.h>
45 #include <stddef.h>
46 #include <fcntl.h>
47 #include <string.h>
48
49 #include "redump.h"
50 #include "disasm.h"
51 #include "io.h"
52 #include "util.h"
53
54 const char *infile;
55 static int dump_full = 0;
56 static int dump_offsets = 0;
57 static int gpu_id = 320;
58 static int shaderdb = 0; /* output shaderdb style traces to stderr */
59
60 struct state {
61 char *buf;
62 int sz;
63 int lvl;
64
65 /* current shader_info section, some offsets calculated relative to
66 * this, rather than relative to start of buffer.
67 */
68 void *shader;
69
70 /* size of each entry within a shader_descriptor_blk: */
71 int desc_size;
72
73 const char *shader_type;
74 int full_regs;
75 int half_regs;
76 };
77
78 #define PACKED __attribute__((__packed__))
79
80 #define OFF(field) do { \
81 if (dump_offsets) \
82 printf("%08x: ", (uint32_t)((char *)&field - state->buf));\
83 } while (0)
84
85 /* decode field as hex */
86 #define X(s, field) do { \
87 OFF(s->field); \
88 printf("%s%12s:\t0x%x\n", tab(state->lvl), #field, s->field); \
89 } while (0)
90
91 /* decode field as digit */
92 #define D(s, field) do { \
93 OFF(s->field); \
94 printf("%s%12s:\t%u\n", tab(state->lvl), #field, s->field); \
95 } while (0)
96
97 /* decode field as float/hex */
98 #define F(s, field) do { \
99 OFF(s->field); \
100 printf("%s%12s:\t%f (0x%0x)\n", tab(state->lvl), #field, \
101 d2f(s->field), s->field); \
102 } while (0)
103
104 /* decode field as register: (type is 'r' or 'c') */
105 #define R(s, field, type) do { \
106 OFF(s->field); \
107 printf("%s%12s:\t%c%u.%c\n", tab(state->lvl), #field, type, \
108 (s->field >> 2), "xyzw"[s->field & 0x3]); \
109 } while (0)
110
111 /* decode inline string (presumably null terminated?) */
112 #define S(s, field) do { \
113 OFF(s->field); \
114 printf("%s%12s:\t%s\n", tab(state->lvl), #field, s->field); \
115 } while (0)
116
117 /* decode string-table string */
118 #define T(s, field) TODO
119
120 /* decode field as unknown */
121 #define U(s, start, end) \
122 dump_unknown(state, s->unk_ ## start ## _ ## end, 0x ## start, (4 + 0x ## end - 0x ## start) / 4)
123
124 /* decode field as offset to other section */
125 #define O(s, field, type) do { \
126 X(s, field); \
127 assert(s->field < state->sz); \
128 void *_p = &state->buf[s->field]; \
129 state->lvl++; \
130 decode_ ## type (state, _p); \
131 state->lvl--; \
132 } while (0)
133
134 struct shader_info;
135 static void decode_shader_info(struct state *state, struct shader_info *info);
136
dump_unknown(struct state * state,void * buf,unsigned start,unsigned n)137 static void dump_unknown(struct state *state, void *buf, unsigned start, unsigned n)
138 {
139 uint32_t *ptr = buf;
140 uint8_t *ascii = buf;
141
142 for (unsigned i = 0; i < n; i++) {
143 uint32_t d = ptr[i];
144
145 if (dump_offsets)
146 printf("%08x:", (uint32_t)((char *)&ptr[i] - state->buf));
147
148 printf("%s %04x:\t%08x", tab(state->lvl), start + i * 4, d);
149
150 printf("\t|");
151 for (unsigned j = 0; j < 4; j++) {
152 uint8_t c = *(ascii++);
153 printf("%c", (isascii(c) && !iscntrl(c)) ? c : '.');
154 }
155 printf("|\t%f", d2f(d));
156
157 /* TODO maybe scan for first non-null and non-ascii char starting from
158 * end of shader binary to (roughly) establish the start of the string
159 * table.. that would be a bit better filter for deciding if something
160 * might be a pointer into the string table. Also, the previous char
161 * to what it points to should probably be null.
162 */
163 if ((d < state->sz) &&
164 isascii(state->buf[d]) &&
165 (strlen(&state->buf[d]) > 2) &&
166 isascii(state->buf[d+1]))
167 printf("\t<== %s", &state->buf[d]);
168
169 printf("\n");
170 }
171 }
172
173 struct PACKED header {
174 uint32_t version; /* I guess, always b10bcace ? */
175 uint32_t unk_0004_0014[5];
176 uint32_t size;
177 uint32_t size2; /* just to be sure? */
178 uint32_t unk_0020_0020[1];
179 uint32_t chksum; /* I guess? Small changes seem to result in big diffs here */
180 uint32_t unk_0028_0050[11];
181 uint32_t fs_info; /* offset of FS shader_info section */
182 uint32_t unk_0058_0090[15];
183 uint32_t vs_info; /* offset of VS shader_info section */
184 uint32_t unk_0098_00b0[7];
185 uint32_t vs_info2; /* offset of VS shader_info section (again?) */
186 uint32_t unk_00b8_0110[23];
187 uint32_t bs_info; /* offset of binning shader_info section */
188 };
189
decode_header(struct state * state,struct header * hdr)190 static void decode_header(struct state *state, struct header *hdr)
191 {
192 X(hdr, version);
193 U(hdr, 0004, 0014);
194 X(hdr, size);
195 X(hdr, size2);
196 U(hdr, 0020, 0020);
197 X(hdr, chksum);
198 U(hdr, 0028, 0050);
199 state->shader_type = "FRAG";
200 O(hdr, fs_info, shader_info);
201 U(hdr, 0058, 0090);
202 state->shader_type = "VERT";
203 O(hdr, vs_info, shader_info);
204 U(hdr, 0098, 00b0);
205 assert(hdr->vs_info == hdr->vs_info2); /* not sure what this if it is ever different */
206 X(hdr, vs_info2);
207 U(hdr, 00b8, 0110);
208 state->shader_type = "BVERT";
209 O(hdr, bs_info, shader_info);
210
211 /* not sure how much of the rest of contents before start of fs_info
212 * is the header, vs other things.. just dump it all as unknown for
213 * now:
214 */
215 dump_unknown(state, (void *)hdr + sizeof(*hdr),
216 sizeof(*hdr), (hdr->fs_info - sizeof(*hdr)) / 4);
217 }
218
219 struct PACKED shader_entry_point {
220 /* entry point name, ie. "main" of TBD length, followed by unknown */
221 char name[8];
222 };
223
decode_shader_entry_point(struct state * state,struct shader_entry_point * e)224 static void decode_shader_entry_point(struct state *state,
225 struct shader_entry_point *e)
226 {
227 S(e, name);
228 }
229
230 struct PACKED shader_config {
231 uint32_t unk_0000_0008[3];
232 uint32_t full_regs;
233 uint32_t half_regs;
234 };
235
decode_shader_config(struct state * state,struct shader_config * cfg)236 static void decode_shader_config(struct state *state, struct shader_config *cfg)
237 {
238 U(cfg, 0000, 0008);
239 D(cfg, full_regs);
240 D(cfg, half_regs);
241
242 state->full_regs = cfg->full_regs;
243 state->half_regs = cfg->half_regs;
244
245 /* dump reset of unknown (size differs btwn versions) */
246 dump_unknown(state, (void *)cfg + sizeof(*cfg), sizeof(*cfg),
247 (state->desc_size - sizeof(*cfg))/4);
248 }
249
250 struct PACKED shader_io_block {
251 /* name of TBD length followed by unknown.. 42 dwords total */
252 char name[20];
253 uint32_t unk_0014_00a4[37];
254 };
255
decode_shader_io_block(struct state * state,struct shader_io_block * io)256 static void decode_shader_io_block(struct state *state,
257 struct shader_io_block *io)
258 {
259 S(io, name);
260 U(io, 0014, 00a4);
261 }
262
263 struct PACKED shader_constant_block {
264 uint32_t value;
265 uint32_t unk_0004_000c[3];
266 uint32_t regid;
267 uint32_t unk_0014_0024[5];
268 };
269
decode_shader_constant_block(struct state * state,struct shader_constant_block * c)270 static void decode_shader_constant_block(struct state *state,
271 struct shader_constant_block *c)
272 {
273 F(c, value);
274 U(c, 0004, 000c);
275 R(c, regid, 'c');
276 U(c, 0014, 0024);
277 }
278
279 enum {
280 ENTRY_POINT = 0, /* shader_entry_point */
281 SHADER_CONFIG = 1, /* XXX placeholder name */
282 SHADER_INPUT = 2, /* shader_io_block */
283 SHADER_OUTPUT = 3, /* shader_io_block */
284 CONSTANTS = 6, /* shader_constant_block */
285 INTERNAL = 8, /* internal input, like bary.f coord */
286 SHADER = 10,
287 } shader_info_block_type;
288
289 /* Refers to location of some type of records, with an offset relative to
290 * start of shader_info block.
291 */
292 struct PACKED shader_descriptor_block {
293 uint32_t type; /* block type */
294 uint32_t offset; /* offset (relative to start of shader_info block) */
295 uint32_t size; /* size in bytes */
296 uint32_t count; /* number of records */
297 uint32_t unk_0010_0010[1];
298 };
299
decode_shader_descriptor_block(struct state * state,struct shader_descriptor_block * blk)300 static void decode_shader_descriptor_block(struct state *state,
301 struct shader_descriptor_block *blk)
302 {
303 D(blk, type);
304 X(blk, offset);
305 D(blk, size);
306 D(blk, count);
307 U(blk, 0010, 0010);
308
309 /* offset relative to current shader block: */
310 void *ptr = state->shader + blk->offset;
311
312 if (blk->count == 0) {
313 assert(blk->size == 0);
314 } else {
315 assert((blk->size % blk->count) == 0);
316 }
317
318 state->desc_size = blk->size / blk->count;
319 state->lvl++;
320 for (unsigned i = 0; i < blk->count; i++) {
321 switch (blk->type) {
322 case ENTRY_POINT:
323 printf("%sentry point %u:\n", tab(state->lvl-1), i);
324 decode_shader_entry_point(state, ptr);
325 break;
326 case SHADER_CONFIG:
327 printf("%sconfig %u:\n", tab(state->lvl-1), i);
328 decode_shader_config(state, ptr);
329 break;
330 case SHADER_INPUT:
331 printf("%sinput %u:\n", tab(state->lvl-1), i);
332 decode_shader_io_block(state, ptr);
333 break;
334 case SHADER_OUTPUT:
335 printf("%soutput %u:\n", tab(state->lvl-1), i);
336 decode_shader_io_block(state, ptr);
337 break;
338 case INTERNAL:
339 printf("%sinternal input %u:\n", tab(state->lvl-1), i);
340 decode_shader_io_block(state, ptr);
341 break;
342 case CONSTANTS:
343 printf("%sconstant %u:\n", tab(state->lvl-1), i);
344 decode_shader_constant_block(state, ptr);
345 break;
346 case SHADER: {
347 struct shader_stats stats;
348 printf("%sshader %u:\n", tab(state->lvl-1), i);
349 disasm_a3xx_stat(ptr, blk->size/4, state->lvl, stdout, gpu_id, &stats);
350 if (shaderdb) {
351 unsigned dwords = 2 * stats.instlen;
352
353 if (gpu_id >= 400) {
354 dwords = ALIGN(dwords, 16 * 2);
355 } else {
356 dwords = ALIGN(dwords, 4 * 2);
357 }
358
359 unsigned half_regs = state->half_regs;
360 unsigned full_regs = state->full_regs;
361
362 /* On a6xx w/ merged/conflicting half and full regs, the
363 * full_regs footprint will be max of full_regs and half
364 * of half_regs.. we only care about which value is higher.
365 */
366 if (gpu_id >= 600) {
367 /* footprint of half_regs in units of full_regs: */
368 unsigned half_full = (half_regs + 1) / 2;
369 if (half_full > full_regs)
370 full_regs = half_full;
371 half_regs = 0;
372 }
373
374 fprintf(stderr,
375 "%s shader: %u inst, %u nops, %u non-nops, %u dwords, "
376 "%u half, %u full, %u constlen, "
377 "%u (ss), %u (sy), %d max_sun, %d loops\n",
378 state->shader_type, stats.instructions,
379 stats.nops, stats.instructions - stats.nops,
380 dwords, half_regs, full_regs,
381 stats.constlen, stats.ss, stats.sy,
382 0, 0); /* max_sun or loops not possible */
383 }
384 /* this is a special case in a way, blk->count is # of
385 * instructions but disasm_a3xx() decodes all instructions,
386 * so just bail.
387 */
388 i = blk->count;
389 break;
390 }
391 default:
392 dump_unknown(state, ptr, 0, state->desc_size/4);
393 break;
394 }
395 ptr += state->desc_size;
396 }
397 state->lvl--;
398 }
399
400 /* there looks like one of these per shader, followed by "main" and
401 * some more info, and then the shader itself.
402 */
403 struct PACKED shader_info {
404 uint32_t unk_0000_0010[5];
405 uint32_t desc_off; /* offset to first descriptor block */
406 uint32_t num_blocks;
407 };
408
decode_shader_info(struct state * state,struct shader_info * info)409 static void decode_shader_info(struct state *state, struct shader_info *info)
410 {
411 assert((info->desc_off % 4) == 0);
412
413 U(info, 0000, 0010);
414 X(info, desc_off);
415 D(info, num_blocks);
416
417 dump_unknown(state, &info[1], 0, (info->desc_off - sizeof(*info))/4);
418
419 state->shader = info;
420
421 struct shader_descriptor_block *blocks = ((void *)info) + info->desc_off;
422 for (unsigned i = 0; i < info->num_blocks; i++) {
423 printf("%sdescriptor %u:\n", tab(state->lvl), i);
424 state->lvl++;
425 decode_shader_descriptor_block(state, &blocks[i]);
426 state->lvl--;
427 }
428 }
429
dump_program(struct state * state)430 static void dump_program(struct state *state)
431 {
432 struct header *hdr = (void *)state->buf;
433
434 if (dump_full)
435 dump_unknown(state, state->buf, 0, state->sz/4);
436
437 decode_header(state, hdr);
438 }
439
main(int argc,char ** argv)440 int main(int argc, char **argv)
441 {
442 enum rd_sect_type type = RD_NONE;
443 enum debug_t debug = PRINT_RAW | PRINT_STATS;
444 void *buf = NULL;
445 int sz;
446 struct io *io;
447 int raw_program = 0;
448
449 /* lame argument parsing: */
450
451 while (1) {
452 if ((argc > 1) && !strcmp(argv[1], "--verbose")) {
453 debug |= PRINT_RAW | PRINT_VERBOSE;
454 argv++;
455 argc--;
456 continue;
457 }
458 if ((argc > 1) && !strcmp(argv[1], "--expand")) {
459 debug |= EXPAND_REPEAT;
460 argv++;
461 argc--;
462 continue;
463 }
464 if ((argc > 1) && !strcmp(argv[1], "--full")) {
465 /* only short dump, original shader, symbol table, and disassembly */
466 dump_full = 1;
467 argv++;
468 argc--;
469 continue;
470 }
471 if ((argc > 1) && !strcmp(argv[1], "--dump-offsets")) {
472 dump_offsets = 1;
473 argv++;
474 argc--;
475 continue;
476 }
477 if ((argc > 1) && !strcmp(argv[1], "--raw")) {
478 raw_program = 1;
479 argv++;
480 argc--;
481 continue;
482 }
483 if ((argc > 1) && !strcmp(argv[1], "--shaderdb")) {
484 shaderdb = 1;
485 argv++;
486 argc--;
487 continue;
488 }
489 break;
490 }
491
492 if (argc != 2) {
493 fprintf(stderr, "usage: pgmdump2 [--verbose] [--expand] [--full] [--dump-offsets] [--raw] [--shaderdb] testlog.rd\n");
494 return -1;
495 }
496
497 disasm_a3xx_set_debug(debug);
498
499 infile = argv[1];
500
501 io = io_open(infile);
502 if (!io) {
503 fprintf(stderr, "could not open: %s\n", infile);
504 return -1;
505 }
506
507 if (raw_program)
508 {
509 io_readn(io, &sz, 4);
510 free(buf);
511
512 /* note: allow hex dumps to go a bit past the end of the buffer..
513 * might see some garbage, but better than missing the last few bytes..
514 */
515 buf = calloc(1, sz + 3);
516 io_readn(io, buf + 4, sz);
517 (*(int*)buf) = sz;
518
519 struct state state = {
520 .buf = buf,
521 .sz = sz,
522 };
523 printf("############################################################\n");
524 printf("program:\n");
525 dump_program(&state);
526 printf("############################################################\n");
527 return 0;
528 }
529
530 /* figure out what sort of input we are dealing with: */
531 if (!(check_extension(infile, ".rd") || check_extension(infile, ".rd.gz"))) {
532 int ret;
533 buf = calloc(1, 100 * 1024);
534 ret = io_readn(io, buf, 100 * 1024);
535 if (ret < 0) {
536 fprintf(stderr, "error: %m");
537 return -1;
538 }
539 return disasm_a3xx(buf, ret/4, 0, stdout, gpu_id);
540 }
541
542 while ((io_readn(io, &type, sizeof(type)) > 0) && (io_readn(io, &sz, 4) > 0)) {
543 free(buf);
544
545 /* note: allow hex dumps to go a bit past the end of the buffer..
546 * might see some garbage, but better than missing the last few bytes..
547 */
548 buf = calloc(1, sz + 3);
549 io_readn(io, buf, sz);
550
551 switch(type) {
552 case RD_TEST:
553 if (dump_full)
554 printf("test: %s\n", (char *)buf);
555 break;
556 case RD_VERT_SHADER:
557 printf("vertex shader:\n%s\n", (char *)buf);
558 break;
559 case RD_FRAG_SHADER:
560 printf("fragment shader:\n%s\n", (char *)buf);
561 break;
562 case RD_PROGRAM: {
563 struct state state = {
564 .buf = buf,
565 .sz = sz,
566 };
567 printf("############################################################\n");
568 printf("program:\n");
569 dump_program(&state);
570 printf("############################################################\n");
571 break;
572 }
573 case RD_GPU_ID:
574 gpu_id = *((unsigned int *)buf);
575 printf("gpu_id: %d\n", gpu_id);
576 break;
577 default:
578 break;
579 }
580 }
581
582 io_close(io);
583
584 return 0;
585 }
586