1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "common/intel_decoder.h"
25 #include "intel_disasm.h"
26 #include "util/macros.h"
27 #include "main/macros.h" /* Needed for ROUND_DOWN_TO */
28
29 #include <string.h>
30
31 void
intel_batch_decode_ctx_init(struct intel_batch_decode_ctx * ctx,const struct intel_device_info * devinfo,FILE * fp,enum intel_batch_decode_flags flags,const char * xml_path,struct intel_batch_decode_bo (* get_bo)(void *,bool,uint64_t),unsigned (* get_state_size)(void *,uint64_t,uint64_t),void * user_data)32 intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
33 const struct intel_device_info *devinfo,
34 FILE *fp, enum intel_batch_decode_flags flags,
35 const char *xml_path,
36 struct intel_batch_decode_bo (*get_bo)(void *,
37 bool,
38 uint64_t),
39 unsigned (*get_state_size)(void *, uint64_t,
40 uint64_t),
41 void *user_data)
42 {
43 memset(ctx, 0, sizeof(*ctx));
44
45 ctx->devinfo = *devinfo;
46 ctx->get_bo = get_bo;
47 ctx->get_state_size = get_state_size;
48 ctx->user_data = user_data;
49 ctx->fp = fp;
50 ctx->flags = flags;
51 ctx->max_vbo_decoded_lines = -1; /* No limit! */
52 ctx->engine = I915_ENGINE_CLASS_RENDER;
53
54 if (xml_path == NULL)
55 ctx->spec = intel_spec_load(devinfo);
56 else
57 ctx->spec = intel_spec_load_from_path(devinfo, xml_path);
58 }
59
60 void
intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx * ctx)61 intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)
62 {
63 intel_spec_destroy(ctx->spec);
64 }
65
66 #define CSI "\e["
67 #define RED_COLOR CSI "31m"
68 #define BLUE_HEADER CSI "0;44m" CSI "1;37m"
69 #define GREEN_HEADER CSI "1;42m"
70 #define NORMAL CSI "0m"
71
72 static void
ctx_print_group(struct intel_batch_decode_ctx * ctx,struct intel_group * group,uint64_t address,const void * map)73 ctx_print_group(struct intel_batch_decode_ctx *ctx,
74 struct intel_group *group,
75 uint64_t address, const void *map)
76 {
77 intel_print_group(ctx->fp, group, address, map, 0,
78 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0);
79 }
80
81 static struct intel_batch_decode_bo
ctx_get_bo(struct intel_batch_decode_ctx * ctx,bool ppgtt,uint64_t addr)82 ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)
83 {
84 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) {
85 /* On Broadwell and above, we have 48-bit addresses which consume two
86 * dwords. Some packets require that these get stored in a "canonical
87 * form" which means that bit 47 is sign-extended through the upper
88 * bits. In order to correctly handle those aub dumps, we need to mask
89 * off the top 16 bits.
90 */
91 addr &= (~0ull >> 16);
92 }
93
94 struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr);
95
96 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0))
97 bo.addr &= (~0ull >> 16);
98
99 /* We may actually have an offset into the bo */
100 if (bo.map != NULL) {
101 assert(bo.addr <= addr);
102 uint64_t offset = addr - bo.addr;
103 bo.map += offset;
104 bo.addr += offset;
105 bo.size -= offset;
106 }
107
108 return bo;
109 }
110
111 static int
update_count(struct intel_batch_decode_ctx * ctx,uint64_t address,uint64_t base_address,unsigned element_dwords,unsigned guess)112 update_count(struct intel_batch_decode_ctx *ctx,
113 uint64_t address,
114 uint64_t base_address,
115 unsigned element_dwords,
116 unsigned guess)
117 {
118 unsigned size = 0;
119
120 if (ctx->get_state_size)
121 size = ctx->get_state_size(ctx->user_data, address, base_address);
122
123 if (size > 0)
124 return size / (sizeof(uint32_t) * element_dwords);
125
126 /* In the absence of any information, just guess arbitrarily. */
127 return guess;
128 }
129
130 static void
ctx_disassemble_program(struct intel_batch_decode_ctx * ctx,uint32_t ksp,const char * type)131 ctx_disassemble_program(struct intel_batch_decode_ctx *ctx,
132 uint32_t ksp, const char *type)
133 {
134 uint64_t addr = ctx->instruction_base + ksp;
135 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
136 if (!bo.map)
137 return;
138
139 fprintf(ctx->fp, "\nReferenced %s:\n", type);
140 intel_disassemble(&ctx->devinfo, bo.map, 0, ctx->fp);
141 }
142
143 /* Heuristic to determine whether a uint32_t is probably actually a float
144 * (http://stackoverflow.com/a/2953466)
145 */
146
147 static bool
probably_float(uint32_t bits)148 probably_float(uint32_t bits)
149 {
150 int exp = ((bits & 0x7f800000U) >> 23) - 127;
151 uint32_t mant = bits & 0x007fffff;
152
153 /* +- 0.0 */
154 if (exp == -127 && mant == 0)
155 return true;
156
157 /* +- 1 billionth to 1 billion */
158 if (-30 <= exp && exp <= 30)
159 return true;
160
161 /* some value with only a few binary digits */
162 if ((mant & 0x0000ffff) == 0)
163 return true;
164
165 return false;
166 }
167
168 static void
ctx_print_buffer(struct intel_batch_decode_ctx * ctx,struct intel_batch_decode_bo bo,uint32_t read_length,uint32_t pitch,int max_lines)169 ctx_print_buffer(struct intel_batch_decode_ctx *ctx,
170 struct intel_batch_decode_bo bo,
171 uint32_t read_length,
172 uint32_t pitch,
173 int max_lines)
174 {
175 const uint32_t *dw_end =
176 bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
177
178 int column_count = 0, pitch_col_count = 0, line_count = -1;
179 for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
180 if (pitch_col_count * 4 == pitch || column_count == 8) {
181 fprintf(ctx->fp, "\n");
182 column_count = 0;
183 if (pitch_col_count * 4 == pitch)
184 pitch_col_count = 0;
185 line_count++;
186
187 if (max_lines >= 0 && line_count >= max_lines)
188 break;
189 }
190 fprintf(ctx->fp, column_count == 0 ? " " : " ");
191
192 if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw))
193 fprintf(ctx->fp, " %8.2f", *(float *) dw);
194 else
195 fprintf(ctx->fp, " 0x%08x", *dw);
196
197 column_count++;
198 pitch_col_count++;
199 }
200 fprintf(ctx->fp, "\n");
201 }
202
203 static struct intel_group *
intel_ctx_find_instruction(struct intel_batch_decode_ctx * ctx,const uint32_t * p)204 intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
205 {
206 return intel_spec_find_instruction(ctx->spec, ctx->engine, p);
207 }
208
209 static void
handle_state_base_address(struct intel_batch_decode_ctx * ctx,const uint32_t * p)210 handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
211 {
212 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
213
214 struct intel_field_iterator iter;
215 intel_field_iterator_init(&iter, inst, p, 0, false);
216
217 uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0;
218 bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0;
219
220 while (intel_field_iterator_next(&iter)) {
221 if (strcmp(iter.name, "Surface State Base Address") == 0) {
222 surface_base = iter.raw_value;
223 } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
224 dynamic_base = iter.raw_value;
225 } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
226 instruction_base = iter.raw_value;
227 } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) {
228 surface_modify = iter.raw_value;
229 } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
230 dynamic_modify = iter.raw_value;
231 } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
232 instruction_modify = iter.raw_value;
233 }
234 }
235
236 if (dynamic_modify)
237 ctx->dynamic_base = dynamic_base;
238
239 if (surface_modify)
240 ctx->surface_base = surface_base;
241
242 if (instruction_modify)
243 ctx->instruction_base = instruction_base;
244 }
245
246 static void
handle_binding_table_pool_alloc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)247 handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx,
248 const uint32_t *p)
249 {
250 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
251
252 struct intel_field_iterator iter;
253 intel_field_iterator_init(&iter, inst, p, 0, false);
254
255 uint64_t bt_pool_base = 0;
256 bool bt_pool_enable = false;
257
258 while (intel_field_iterator_next(&iter)) {
259 if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) {
260 bt_pool_base = iter.raw_value;
261 } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) {
262 bt_pool_enable = iter.raw_value;
263 }
264 }
265
266 if (bt_pool_enable) {
267 ctx->bt_pool_base = bt_pool_base;
268 } else {
269 ctx->bt_pool_base = 0;
270 }
271 }
272
273 static void
dump_binding_table(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)274 dump_binding_table(struct intel_batch_decode_ctx *ctx,
275 uint32_t offset, int count)
276 {
277 struct intel_group *strct =
278 intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
279 if (strct == NULL) {
280 fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
281 return;
282 }
283
284 /* When 256B binding tables are enabled, we have to shift the offset */
285 if (ctx->use_256B_binding_tables)
286 offset <<= 3;
287
288 const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base :
289 ctx->surface_base;
290
291 if (count < 0) {
292 count = update_count(ctx, bt_pool_base + offset,
293 bt_pool_base, 1, 8);
294 }
295
296 if (offset % 32 != 0 || offset >= UINT16_MAX) {
297 fprintf(ctx->fp, " invalid binding table pointer\n");
298 return;
299 }
300
301 struct intel_batch_decode_bo bind_bo =
302 ctx_get_bo(ctx, true, bt_pool_base + offset);
303
304 if (bind_bo.map == NULL) {
305 fprintf(ctx->fp, " binding table unavailable\n");
306 return;
307 }
308
309 const uint32_t *pointers = bind_bo.map;
310 for (int i = 0; i < count; i++) {
311 if (pointers[i] == 0)
312 continue;
313
314 uint64_t addr = ctx->surface_base + pointers[i];
315 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
316 uint32_t size = strct->dw_length * 4;
317
318 if (pointers[i] % 32 != 0 ||
319 addr < bo.addr || addr + size >= bo.addr + bo.size) {
320 fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]);
321 continue;
322 }
323
324 fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]);
325 ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
326 }
327 }
328
329 static void
dump_samplers(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)330 dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)
331 {
332 struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE");
333 uint64_t state_addr = ctx->dynamic_base + offset;
334
335 assert(count > 0);
336
337 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
338 const void *state_map = bo.map;
339
340 if (state_map == NULL) {
341 fprintf(ctx->fp, " samplers unavailable\n");
342 return;
343 }
344
345 if (offset % 32 != 0) {
346 fprintf(ctx->fp, " invalid sampler state pointer\n");
347 return;
348 }
349
350 const unsigned sampler_state_size = strct->dw_length * 4;
351
352 if (count * sampler_state_size >= bo.size) {
353 fprintf(ctx->fp, " sampler state ends after bo ends\n");
354 assert(!"sampler state ends after bo ends");
355 return;
356 }
357
358 for (int i = 0; i < count; i++) {
359 fprintf(ctx->fp, "sampler state %d\n", i);
360 ctx_print_group(ctx, strct, state_addr, state_map);
361 state_addr += sampler_state_size;
362 state_map += sampler_state_size;
363 }
364 }
365
366 static void
handle_interface_descriptor_data(struct intel_batch_decode_ctx * ctx,struct intel_group * desc,const uint32_t * p)367 handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx,
368 struct intel_group *desc, const uint32_t *p)
369 {
370 uint64_t ksp = 0;
371 uint32_t sampler_offset = 0, sampler_count = 0;
372 uint32_t binding_table_offset = 0, binding_entry_count = 0;
373
374 struct intel_field_iterator iter;
375 intel_field_iterator_init(&iter, desc, p, 0, false);
376 while (intel_field_iterator_next(&iter)) {
377 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
378 ksp = strtoll(iter.value, NULL, 16);
379 } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
380 sampler_offset = strtol(iter.value, NULL, 16);
381 } else if (strcmp(iter.name, "Sampler Count") == 0) {
382 sampler_count = strtol(iter.value, NULL, 10);
383 } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
384 binding_table_offset = strtol(iter.value, NULL, 16);
385 } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
386 binding_entry_count = strtol(iter.value, NULL, 10);
387 }
388 }
389
390 ctx_disassemble_program(ctx, ksp, "compute shader");
391 fprintf(ctx->fp, "\n");
392
393 if (sampler_count)
394 dump_samplers(ctx, sampler_offset, sampler_count);
395 if (binding_entry_count)
396 dump_binding_table(ctx, binding_table_offset, binding_entry_count);
397 }
398
399 static void
handle_media_interface_descriptor_load(struct intel_batch_decode_ctx * ctx,const uint32_t * p)400 handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx,
401 const uint32_t *p)
402 {
403 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
404 struct intel_group *desc =
405 intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
406
407 struct intel_field_iterator iter;
408 intel_field_iterator_init(&iter, inst, p, 0, false);
409 uint32_t descriptor_offset = 0;
410 int descriptor_count = 0;
411 while (intel_field_iterator_next(&iter)) {
412 if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
413 descriptor_offset = strtol(iter.value, NULL, 16);
414 } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
415 descriptor_count =
416 strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
417 }
418 }
419
420 uint64_t desc_addr = ctx->dynamic_base + descriptor_offset;
421 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr);
422 const void *desc_map = bo.map;
423
424 if (desc_map == NULL) {
425 fprintf(ctx->fp, " interface descriptors unavailable\n");
426 return;
427 }
428
429 for (int i = 0; i < descriptor_count; i++) {
430 fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
431
432 ctx_print_group(ctx, desc, desc_addr, desc_map);
433
434 handle_interface_descriptor_data(ctx, desc, desc_map);
435
436 desc_map += desc->dw_length;
437 desc_addr += desc->dw_length * 4;
438 }
439 }
440
441 static void
handle_compute_walker(struct intel_batch_decode_ctx * ctx,const uint32_t * p)442 handle_compute_walker(struct intel_batch_decode_ctx *ctx,
443 const uint32_t *p)
444 {
445 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
446
447 struct intel_field_iterator iter;
448 intel_field_iterator_init(&iter, inst, p, 0, false);
449 while (intel_field_iterator_next(&iter)) {
450 if (strcmp(iter.name, "Interface Descriptor") == 0) {
451 handle_interface_descriptor_data(ctx, iter.struct_desc,
452 &iter.p[iter.start_bit / 32]);
453 }
454 }
455 }
456
457 static void
handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)458 handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx,
459 const uint32_t *p)
460 {
461 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
462 struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
463
464 struct intel_batch_decode_bo vb = {};
465 uint32_t vb_size = 0;
466 int index = -1;
467 int pitch = -1;
468 bool ready = false;
469
470 struct intel_field_iterator iter;
471 intel_field_iterator_init(&iter, inst, p, 0, false);
472 while (intel_field_iterator_next(&iter)) {
473 if (iter.struct_desc != vbs)
474 continue;
475
476 struct intel_field_iterator vbs_iter;
477 intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false);
478 while (intel_field_iterator_next(&vbs_iter)) {
479 if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
480 index = vbs_iter.raw_value;
481 } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
482 pitch = vbs_iter.raw_value;
483 } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
484 vb = ctx_get_bo(ctx, true, vbs_iter.raw_value);
485 } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
486 vb_size = vbs_iter.raw_value;
487 ready = true;
488 } else if (strcmp(vbs_iter.name, "End Address") == 0) {
489 if (vb.map && vbs_iter.raw_value >= vb.addr)
490 vb_size = (vbs_iter.raw_value + 1) - vb.addr;
491 else
492 vb_size = 0;
493 ready = true;
494 }
495
496 if (!ready)
497 continue;
498
499 fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
500
501 if (vb.map == NULL) {
502 fprintf(ctx->fp, " buffer contents unavailable\n");
503 continue;
504 }
505
506 if (vb.map == 0 || vb_size == 0)
507 continue;
508
509 ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines);
510
511 vb.map = NULL;
512 vb_size = 0;
513 index = -1;
514 pitch = -1;
515 ready = false;
516 }
517 }
518 }
519
520 static void
handle_3dstate_index_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)521 handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx,
522 const uint32_t *p)
523 {
524 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
525
526 struct intel_batch_decode_bo ib = {};
527 uint32_t ib_size = 0;
528 uint32_t format = 0;
529
530 struct intel_field_iterator iter;
531 intel_field_iterator_init(&iter, inst, p, 0, false);
532 while (intel_field_iterator_next(&iter)) {
533 if (strcmp(iter.name, "Index Format") == 0) {
534 format = iter.raw_value;
535 } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
536 ib = ctx_get_bo(ctx, true, iter.raw_value);
537 } else if (strcmp(iter.name, "Buffer Size") == 0) {
538 ib_size = iter.raw_value;
539 }
540 }
541
542 if (ib.map == NULL) {
543 fprintf(ctx->fp, " buffer contents unavailable\n");
544 return;
545 }
546
547 const void *m = ib.map;
548 const void *ib_end = ib.map + MIN2(ib.size, ib_size);
549 for (int i = 0; m < ib_end && i < 10; i++) {
550 switch (format) {
551 case 0:
552 fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
553 m += 1;
554 break;
555 case 1:
556 fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
557 m += 2;
558 break;
559 case 2:
560 fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
561 m += 4;
562 break;
563 }
564 }
565
566 if (m < ib_end)
567 fprintf(ctx->fp, "...");
568 fprintf(ctx->fp, "\n");
569 }
570
571 static void
decode_single_ksp(struct intel_batch_decode_ctx * ctx,const uint32_t * p)572 decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
573 {
574 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
575
576 uint64_t ksp = 0;
577 bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
578 bool is_enabled = true;
579
580 struct intel_field_iterator iter;
581 intel_field_iterator_init(&iter, inst, p, 0, false);
582 while (intel_field_iterator_next(&iter)) {
583 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
584 ksp = iter.raw_value;
585 } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
586 is_simd8 = iter.raw_value;
587 } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
588 is_simd8 = strcmp(iter.value, "SIMD8") == 0;
589 } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
590 is_simd8 = strcmp(iter.value, "SIMD8") == 0;
591 } else if (strcmp(iter.name, "Enable") == 0) {
592 is_enabled = iter.raw_value;
593 }
594 }
595
596 const char *type =
597 strcmp(inst->name, "VS_STATE") == 0 ? "vertex shader" :
598 strcmp(inst->name, "GS_STATE") == 0 ? "geometry shader" :
599 strcmp(inst->name, "SF_STATE") == 0 ? "strips and fans shader" :
600 strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
601 strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
602 strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" :
603 strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
604 strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
605 NULL;
606
607 if (is_enabled) {
608 ctx_disassemble_program(ctx, ksp, type);
609 fprintf(ctx->fp, "\n");
610 }
611 }
612
613 static void
decode_ps_kern(struct intel_batch_decode_ctx * ctx,struct intel_group * inst,const uint32_t * p)614 decode_ps_kern(struct intel_batch_decode_ctx *ctx,
615 struct intel_group *inst, const uint32_t *p)
616 {
617 bool single_ksp = ctx->devinfo.ver == 4;
618 uint64_t ksp[3] = {0, 0, 0};
619 bool enabled[3] = {false, false, false};
620
621 struct intel_field_iterator iter;
622 intel_field_iterator_init(&iter, inst, p, 0, false);
623 while (intel_field_iterator_next(&iter)) {
624 if (strncmp(iter.name, "Kernel Start Pointer ",
625 strlen("Kernel Start Pointer ")) == 0) {
626 int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
627 ksp[idx] = strtol(iter.value, NULL, 16);
628 } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
629 enabled[0] = strcmp(iter.value, "true") == 0;
630 } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
631 enabled[1] = strcmp(iter.value, "true") == 0;
632 } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
633 enabled[2] = strcmp(iter.value, "true") == 0;
634 }
635 }
636
637 if (single_ksp)
638 ksp[1] = ksp[2] = ksp[0];
639
640 /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
641 if (enabled[0] + enabled[1] + enabled[2] == 1) {
642 if (enabled[1]) {
643 ksp[1] = ksp[0];
644 ksp[0] = 0;
645 } else if (enabled[2]) {
646 ksp[2] = ksp[0];
647 ksp[0] = 0;
648 }
649 } else {
650 uint64_t tmp = ksp[1];
651 ksp[1] = ksp[2];
652 ksp[2] = tmp;
653 }
654
655 if (enabled[0])
656 ctx_disassemble_program(ctx, ksp[0], "SIMD8 fragment shader");
657 if (enabled[1])
658 ctx_disassemble_program(ctx, ksp[1], "SIMD16 fragment shader");
659 if (enabled[2])
660 ctx_disassemble_program(ctx, ksp[2], "SIMD32 fragment shader");
661
662 if (enabled[0] || enabled[1] || enabled[2])
663 fprintf(ctx->fp, "\n");
664 }
665
666 static void
decode_ps_kernels(struct intel_batch_decode_ctx * ctx,const uint32_t * p)667 decode_ps_kernels(struct intel_batch_decode_ctx *ctx,
668 const uint32_t *p)
669 {
670 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
671 decode_ps_kern(ctx, inst, p);
672 }
673
674 static void
decode_3dstate_constant_all(struct intel_batch_decode_ctx * ctx,const uint32_t * p)675 decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
676 {
677 struct intel_group *inst =
678 intel_spec_find_instruction(ctx->spec, ctx->engine, p);
679 struct intel_group *body =
680 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA");
681
682 uint32_t read_length[4];
683 struct intel_batch_decode_bo buffer[4];
684 memset(buffer, 0, sizeof(buffer));
685
686 struct intel_field_iterator outer;
687 intel_field_iterator_init(&outer, inst, p, 0, false);
688 int idx = 0;
689 while (intel_field_iterator_next(&outer)) {
690 if (outer.struct_desc != body)
691 continue;
692
693 struct intel_field_iterator iter;
694 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
695 0, false);
696 while (intel_field_iterator_next(&iter)) {
697 if (!strcmp(iter.name, "Pointer To Constant Buffer")) {
698 buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value);
699 } else if (!strcmp(iter.name, "Constant Buffer Read Length")) {
700 read_length[idx] = iter.raw_value;
701 }
702 }
703 idx++;
704 }
705
706 for (int i = 0; i < 4; i++) {
707 if (read_length[i] == 0 || buffer[i].map == NULL)
708 continue;
709
710 unsigned size = read_length[i] * 32;
711 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
712
713 ctx_print_buffer(ctx, buffer[i], size, 0, -1);
714 }
715 }
716
717 static void
decode_3dstate_constant(struct intel_batch_decode_ctx * ctx,const uint32_t * p)718 decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
719 {
720 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
721 struct intel_group *body =
722 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
723
724 uint32_t read_length[4] = {0};
725 uint64_t read_addr[4];
726
727 struct intel_field_iterator outer;
728 intel_field_iterator_init(&outer, inst, p, 0, false);
729 while (intel_field_iterator_next(&outer)) {
730 if (outer.struct_desc != body)
731 continue;
732
733 struct intel_field_iterator iter;
734 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
735 0, false);
736
737 while (intel_field_iterator_next(&iter)) {
738 int idx;
739 if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) {
740 read_length[idx] = iter.raw_value;
741 } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) {
742 read_addr[idx] = iter.raw_value;
743 }
744 }
745
746 for (int i = 0; i < 4; i++) {
747 if (read_length[i] == 0)
748 continue;
749
750 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]);
751 if (!buffer.map) {
752 fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
753 continue;
754 }
755
756 unsigned size = read_length[i] * 32;
757 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
758
759 ctx_print_buffer(ctx, buffer, size, 0, -1);
760 }
761 }
762 }
763
764 static void
decode_gfx4_constant_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)765 decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
766 {
767 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
768 uint64_t read_length = 0, read_addr = 0, valid = 0;
769 struct intel_field_iterator iter;
770 intel_field_iterator_init(&iter, inst, p, 0, false);
771
772 while (intel_field_iterator_next(&iter)) {
773 if (!strcmp(iter.name, "Buffer Length")) {
774 read_length = iter.raw_value;
775 } else if (!strcmp(iter.name, "Valid")) {
776 valid = iter.raw_value;
777 } else if (!strcmp(iter.name, "Buffer Starting Address")) {
778 read_addr = iter.raw_value;
779 }
780 }
781
782 if (!valid)
783 return;
784
785 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr);
786 if (!buffer.map) {
787 fprintf(ctx->fp, "constant buffer unavailable\n");
788 return;
789 }
790 unsigned size = (read_length + 1) * 16 * sizeof(float);
791 fprintf(ctx->fp, "constant buffer size %u\n", size);
792
793 ctx_print_buffer(ctx, buffer, size, 0, -1);
794 }
795
796
797 static void
decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)798 decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
799 const uint32_t *p)
800 {
801 fprintf(ctx->fp, "VS Binding Table:\n");
802 dump_binding_table(ctx, p[1], -1);
803
804 fprintf(ctx->fp, "GS Binding Table:\n");
805 dump_binding_table(ctx, p[2], -1);
806
807 if (ctx->devinfo.ver < 6) {
808 fprintf(ctx->fp, "CLIP Binding Table:\n");
809 dump_binding_table(ctx, p[3], -1);
810 fprintf(ctx->fp, "SF Binding Table:\n");
811 dump_binding_table(ctx, p[4], -1);
812 fprintf(ctx->fp, "PS Binding Table:\n");
813 dump_binding_table(ctx, p[5], -1);
814 } else {
815 fprintf(ctx->fp, "PS Binding Table:\n");
816 dump_binding_table(ctx, p[3], -1);
817 }
818 }
819
820 static void
decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)821 decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
822 const uint32_t *p)
823 {
824 dump_binding_table(ctx, p[1], -1);
825 }
826
827 static void
decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)828 decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx,
829 const uint32_t *p)
830 {
831 dump_samplers(ctx, p[1], 1);
832 }
833
834 static void
decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx * ctx,const uint32_t * p)835 decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx,
836 const uint32_t *p)
837 {
838 dump_samplers(ctx, p[1], 1);
839 dump_samplers(ctx, p[2], 1);
840 dump_samplers(ctx, p[3], 1);
841 }
842
843 static bool
str_ends_with(const char * str,const char * end)844 str_ends_with(const char *str, const char *end)
845 {
846 int offset = strlen(str) - strlen(end);
847 if (offset < 0)
848 return false;
849
850 return strcmp(str + offset, end) == 0;
851 }
852
853 static void
decode_dynamic_state(struct intel_batch_decode_ctx * ctx,const char * struct_type,uint32_t state_offset,int count)854 decode_dynamic_state(struct intel_batch_decode_ctx *ctx,
855 const char *struct_type, uint32_t state_offset,
856 int count)
857 {
858 uint64_t state_addr = ctx->dynamic_base + state_offset;
859 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
860 const void *state_map = bo.map;
861
862 if (state_map == NULL) {
863 fprintf(ctx->fp, " dynamic %s state unavailable\n", struct_type);
864 return;
865 }
866
867 struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type);
868 if (strcmp(struct_type, "BLEND_STATE") == 0) {
869 /* Blend states are different from the others because they have a header
870 * struct called BLEND_STATE which is followed by a variable number of
871 * BLEND_STATE_ENTRY structs.
872 */
873 fprintf(ctx->fp, "%s\n", struct_type);
874 ctx_print_group(ctx, state, state_addr, state_map);
875
876 state_addr += state->dw_length * 4;
877 state_map += state->dw_length * 4;
878
879 struct_type = "BLEND_STATE_ENTRY";
880 state = intel_spec_find_struct(ctx->spec, struct_type);
881 }
882
883 count = update_count(ctx, ctx->dynamic_base + state_offset,
884 ctx->dynamic_base, state->dw_length, count);
885
886 for (int i = 0; i < count; i++) {
887 fprintf(ctx->fp, "%s %d\n", struct_type, i);
888 ctx_print_group(ctx, state, state_addr, state_map);
889
890 state_addr += state->dw_length * 4;
891 state_map += state->dw_length * 4;
892 }
893 }
894
895 static void
decode_dynamic_state_pointers(struct intel_batch_decode_ctx * ctx,const char * struct_type,const uint32_t * p,int count)896 decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
897 const char *struct_type, const uint32_t *p,
898 int count)
899 {
900 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
901
902 uint32_t state_offset = 0;
903
904 struct intel_field_iterator iter;
905 intel_field_iterator_init(&iter, inst, p, 0, false);
906 while (intel_field_iterator_next(&iter)) {
907 if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
908 state_offset = iter.raw_value;
909 break;
910 }
911 }
912 decode_dynamic_state(ctx, struct_type, state_offset, count);
913 }
914
915 static void
decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)916 decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx,
917 const uint32_t *p)
918 {
919 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
920 uint32_t state_offset = 0;
921 bool clip = false, sf = false, cc = false;
922 struct intel_field_iterator iter;
923 intel_field_iterator_init(&iter, inst, p, 0, false);
924 while (intel_field_iterator_next(&iter)) {
925 if (!strcmp(iter.name, "CLIP Viewport State Change"))
926 clip = iter.raw_value;
927 if (!strcmp(iter.name, "SF Viewport State Change"))
928 sf = iter.raw_value;
929 if (!strcmp(iter.name, "CC Viewport State Change"))
930 cc = iter.raw_value;
931 else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) {
932 state_offset = iter.raw_value;
933 decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1);
934 }
935 else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) {
936 state_offset = iter.raw_value;
937 decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1);
938 }
939 else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) {
940 state_offset = iter.raw_value;
941 decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1);
942 }
943 }
944 }
945
946 static void
decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)947 decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx,
948 const uint32_t *p)
949 {
950 decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
951 }
952
953 static void
decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx * ctx,const uint32_t * p)954 decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx,
955 const uint32_t *p)
956 {
957 decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
958 }
959
960 static void
decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)961 decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx,
962 const uint32_t *p)
963 {
964 decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1);
965 }
966
967 static void
decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)968 decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
969 const uint32_t *p)
970 {
971 if (ctx->devinfo.ver != 6) {
972 decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
973 return;
974 }
975
976 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
977
978 uint32_t state_offset = 0;
979 bool blend_change = false, ds_change = false, cc_change = false;
980 struct intel_field_iterator iter;
981 intel_field_iterator_init(&iter, inst, p, 0, false);
982 while (intel_field_iterator_next(&iter)) {
983 if (!strcmp(iter.name, "BLEND_STATE Change"))
984 blend_change = iter.raw_value;
985 else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change"))
986 ds_change = iter.raw_value;
987 else if (!strcmp(iter.name, "Color Calc State Pointer Valid"))
988 cc_change = iter.raw_value;
989 else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) {
990 state_offset = iter.raw_value;
991 decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1);
992 }
993 else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) {
994 state_offset = iter.raw_value;
995 decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1);
996 }
997 else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) {
998 state_offset = iter.raw_value;
999 decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1);
1000 }
1001 }
1002 }
1003
1004 static void
decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1005 decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
1006 const uint32_t *p)
1007 {
1008 decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
1009 }
1010
1011 static void
decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1012 decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
1013 const uint32_t *p)
1014 {
1015 decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
1016 }
1017
1018 static void
decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1019 decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx,
1020 const uint32_t *p)
1021 {
1022 decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1);
1023 }
1024
1025 static void
handle_gt_mode(struct intel_batch_decode_ctx * ctx,uint32_t reg_addr,uint32_t val)1026 handle_gt_mode(struct intel_batch_decode_ctx *ctx,
1027 uint32_t reg_addr, uint32_t val)
1028 {
1029 struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr);
1030
1031 assert(intel_group_get_length(reg, &val) == 1);
1032
1033 struct intel_field_iterator iter;
1034 intel_field_iterator_init(&iter, reg, &val, 0, false);
1035
1036 uint32_t bt_alignment;
1037 bool bt_alignment_mask = 0;
1038
1039 while (intel_field_iterator_next(&iter)) {
1040 if (strcmp(iter.name, "Binding Table Alignment") == 0) {
1041 bt_alignment = iter.raw_value;
1042 } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) {
1043 bt_alignment_mask = iter.raw_value;
1044 }
1045 }
1046
1047 if (bt_alignment_mask)
1048 ctx->use_256B_binding_tables = bt_alignment;
1049 }
1050
1051 struct reg_handler {
1052 const char *name;
1053 void (*handler)(struct intel_batch_decode_ctx *ctx,
1054 uint32_t reg_addr, uint32_t val);
1055 } reg_handlers[] = {
1056 { "GT_MODE", handle_gt_mode }
1057 };
1058
1059 static void
decode_load_register_imm(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1060 decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1061 {
1062 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1063 const unsigned length = intel_group_get_length(inst, p);
1064 assert(length & 1);
1065 const unsigned nr_regs = (length - 1) / 2;
1066
1067 for (unsigned i = 0; i < nr_regs; i++) {
1068 struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]);
1069 if (reg != NULL) {
1070 fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
1071 reg->name, reg->register_offset, p[2]);
1072 ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
1073
1074 for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) {
1075 if (strcmp(reg->name, reg_handlers[i].name) == 0)
1076 reg_handlers[i].handler(ctx, p[1], p[2]);
1077 }
1078 }
1079 }
1080 }
1081
1082 static void
decode_vs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1083 decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1084 {
1085 struct intel_group *strct =
1086 intel_spec_find_struct(ctx->spec, "VS_STATE");
1087 if (strct == NULL) {
1088 fprintf(ctx->fp, "did not find VS_STATE info\n");
1089 return;
1090 }
1091
1092 struct intel_batch_decode_bo bind_bo =
1093 ctx_get_bo(ctx, true, offset);
1094
1095 if (bind_bo.map == NULL) {
1096 fprintf(ctx->fp, " vs state unavailable\n");
1097 return;
1098 }
1099
1100 ctx_print_group(ctx, strct, offset, bind_bo.map);
1101
1102 uint64_t ksp = 0;
1103 bool is_enabled = true;
1104 struct intel_field_iterator iter;
1105 intel_field_iterator_init(&iter, strct, bind_bo.map, 0, false);
1106 while (intel_field_iterator_next(&iter)) {
1107 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
1108 ksp = iter.raw_value;
1109 } else if (strcmp(iter.name, "Enable") == 0) {
1110 is_enabled = iter.raw_value;
1111 }
1112 }
1113 if (is_enabled) {
1114 ctx_disassemble_program(ctx, ksp, "vertex shader");
1115 fprintf(ctx->fp, "\n");
1116 }
1117 }
1118
1119 static void
decode_gs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1120 decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1121 {
1122 struct intel_group *strct =
1123 intel_spec_find_struct(ctx->spec, "GS_STATE");
1124 if (strct == NULL) {
1125 fprintf(ctx->fp, "did not find GS_STATE info\n");
1126 return;
1127 }
1128
1129 struct intel_batch_decode_bo bind_bo =
1130 ctx_get_bo(ctx, true, offset);
1131
1132 if (bind_bo.map == NULL) {
1133 fprintf(ctx->fp, " gs state unavailable\n");
1134 return;
1135 }
1136
1137 ctx_print_group(ctx, strct, offset, bind_bo.map);
1138 }
1139
1140 static void
decode_clip_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1141 decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1142 {
1143 struct intel_group *strct =
1144 intel_spec_find_struct(ctx->spec, "CLIP_STATE");
1145 if (strct == NULL) {
1146 fprintf(ctx->fp, "did not find CLIP_STATE info\n");
1147 return;
1148 }
1149
1150 struct intel_batch_decode_bo bind_bo =
1151 ctx_get_bo(ctx, true, offset);
1152
1153 if (bind_bo.map == NULL) {
1154 fprintf(ctx->fp, " clip state unavailable\n");
1155 return;
1156 }
1157
1158 ctx_print_group(ctx, strct, offset, bind_bo.map);
1159
1160 struct intel_group *vp_strct =
1161 intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT");
1162 if (vp_strct == NULL) {
1163 fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n");
1164 return;
1165 }
1166 uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3;
1167 struct intel_batch_decode_bo vp_bo =
1168 ctx_get_bo(ctx, true, clip_vp_offset);
1169 if (vp_bo.map == NULL) {
1170 fprintf(ctx->fp, " clip vp state unavailable\n");
1171 return;
1172 }
1173 ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map);
1174 }
1175
1176 static void
decode_sf_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1177 decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1178 {
1179 struct intel_group *strct =
1180 intel_spec_find_struct(ctx->spec, "SF_STATE");
1181 if (strct == NULL) {
1182 fprintf(ctx->fp, "did not find SF_STATE info\n");
1183 return;
1184 }
1185
1186 struct intel_batch_decode_bo bind_bo =
1187 ctx_get_bo(ctx, true, offset);
1188
1189 if (bind_bo.map == NULL) {
1190 fprintf(ctx->fp, " sf state unavailable\n");
1191 return;
1192 }
1193
1194 ctx_print_group(ctx, strct, offset, bind_bo.map);
1195
1196 struct intel_group *vp_strct =
1197 intel_spec_find_struct(ctx->spec, "SF_VIEWPORT");
1198 if (vp_strct == NULL) {
1199 fprintf(ctx->fp, "did not find SF_VIEWPORT info\n");
1200 return;
1201 }
1202
1203 uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3;
1204 struct intel_batch_decode_bo vp_bo =
1205 ctx_get_bo(ctx, true, sf_vp_offset);
1206 if (vp_bo.map == NULL) {
1207 fprintf(ctx->fp, " sf vp state unavailable\n");
1208 return;
1209 }
1210 ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map);
1211 }
1212
1213 static void
decode_wm_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1214 decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1215 {
1216 struct intel_group *strct =
1217 intel_spec_find_struct(ctx->spec, "WM_STATE");
1218 if (strct == NULL) {
1219 fprintf(ctx->fp, "did not find WM_STATE info\n");
1220 return;
1221 }
1222
1223 struct intel_batch_decode_bo bind_bo =
1224 ctx_get_bo(ctx, true, offset);
1225
1226 if (bind_bo.map == NULL) {
1227 fprintf(ctx->fp, " wm state unavailable\n");
1228 return;
1229 }
1230
1231 ctx_print_group(ctx, strct, offset, bind_bo.map);
1232
1233 decode_ps_kern(ctx, strct, bind_bo.map);
1234 }
1235
1236 static void
decode_cc_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1237 decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1238 {
1239 struct intel_group *strct =
1240 intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE");
1241 if (strct == NULL) {
1242 fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n");
1243 return;
1244 }
1245
1246 struct intel_batch_decode_bo bind_bo =
1247 ctx_get_bo(ctx, true, offset);
1248
1249 if (bind_bo.map == NULL) {
1250 fprintf(ctx->fp, " cc state unavailable\n");
1251 return;
1252 }
1253
1254 ctx_print_group(ctx, strct, offset, bind_bo.map);
1255
1256 struct intel_group *vp_strct =
1257 intel_spec_find_struct(ctx->spec, "CC_VIEWPORT");
1258 if (vp_strct == NULL) {
1259 fprintf(ctx->fp, "did not find CC_VIEWPORT info\n");
1260 return;
1261 }
1262 uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3;
1263 struct intel_batch_decode_bo vp_bo =
1264 ctx_get_bo(ctx, true, cc_vp_offset);
1265 if (vp_bo.map == NULL) {
1266 fprintf(ctx->fp, " cc vp state unavailable\n");
1267 return;
1268 }
1269 ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map);
1270 }
1271 static void
decode_pipelined_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1272 decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1273 {
1274 fprintf(ctx->fp, "VS State Table:\n");
1275 decode_vs_state(ctx, p[1]);
1276 if (p[2] & 1) {
1277 fprintf(ctx->fp, "GS State Table:\n");
1278 decode_gs_state(ctx, p[2] & ~1);
1279 }
1280 fprintf(ctx->fp, "Clip State Table:\n");
1281 decode_clip_state(ctx, p[3] & ~1);
1282 fprintf(ctx->fp, "SF State Table:\n");
1283 decode_sf_state(ctx, p[4]);
1284 fprintf(ctx->fp, "WM State Table:\n");
1285 decode_wm_state(ctx, p[5]);
1286 fprintf(ctx->fp, "CC State Table:\n");
1287 decode_cc_state(ctx, p[6]);
1288 }
1289
1290 static void
decode_cps_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1291 decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1292 {
1293 decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1);
1294 }
1295
1296 struct custom_decoder {
1297 const char *cmd_name;
1298 void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p);
1299 } custom_decoders[] = {
1300 { "STATE_BASE_ADDRESS", handle_state_base_address },
1301 { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc },
1302 { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
1303 { "COMPUTE_WALKER", handle_compute_walker },
1304 { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
1305 { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
1306 { "3DSTATE_VS", decode_single_ksp },
1307 { "3DSTATE_GS", decode_single_ksp },
1308 { "3DSTATE_DS", decode_single_ksp },
1309 { "3DSTATE_HS", decode_single_ksp },
1310 { "3DSTATE_PS", decode_ps_kernels },
1311 { "3DSTATE_WM", decode_ps_kernels },
1312 { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
1313 { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
1314 { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
1315 { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
1316 { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
1317 { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
1318
1319 { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers },
1320 { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
1321 { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
1322 { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
1323 { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
1324 { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
1325
1326 { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
1327 { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
1328 { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
1329 { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
1330 { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
1331 { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
1332
1333 { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers },
1334 { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
1335 { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
1336 { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
1337 { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
1338 { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
1339 { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
1340 { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
1341 { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
1342 { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers },
1343 { "3DSTATE_CPS_POINTERS", decode_cps_pointers },
1344 { "CONSTANT_BUFFER", decode_gfx4_constant_buffer },
1345 };
1346
1347 void
intel_print_batch(struct intel_batch_decode_ctx * ctx,const uint32_t * batch,uint32_t batch_size,uint64_t batch_addr,bool from_ring)1348 intel_print_batch(struct intel_batch_decode_ctx *ctx,
1349 const uint32_t *batch, uint32_t batch_size,
1350 uint64_t batch_addr, bool from_ring)
1351 {
1352 const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1353 int length;
1354 struct intel_group *inst;
1355 const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : "";
1356
1357 if (ctx->n_batch_buffer_start >= 100) {
1358 fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n",
1359 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1360 (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0,
1361 reset_color);
1362 return;
1363 }
1364
1365 ctx->n_batch_buffer_start++;
1366
1367 for (p = batch; p < end; p += length) {
1368 inst = intel_ctx_find_instruction(ctx, p);
1369 length = intel_group_get_length(inst, p);
1370 assert(inst == NULL || length > 0);
1371 length = MAX2(1, length);
1372
1373 uint64_t offset;
1374 if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS)
1375 offset = batch_addr + ((char *)p - (char *)batch);
1376 else
1377 offset = 0;
1378
1379 if (inst == NULL) {
1380 fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n",
1381 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1382 offset, p[0], reset_color);
1383
1384 for (int i=1; i < length; i++) {
1385 fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n",
1386 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1387 offset + i * 4, p[i], reset_color);
1388 }
1389
1390 continue;
1391 }
1392
1393 const char *color;
1394 const char *inst_name = intel_group_get_name(inst);
1395 if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) {
1396 reset_color = NORMAL;
1397 if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1398 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
1399 strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
1400 color = GREEN_HEADER;
1401 else
1402 color = BLUE_HEADER;
1403 } else {
1404 color = NORMAL;
1405 }
1406 } else {
1407 color = "";
1408 reset_color = "";
1409 }
1410
1411 fprintf(ctx->fp, "%s0x%08"PRIx64"%s: 0x%08x: %-80s%s\n", color, offset,
1412 ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0],
1413 inst_name, reset_color);
1414
1415 if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1416 ctx_print_group(ctx, inst, offset, p);
1417
1418 for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) {
1419 if (strcmp(inst_name, custom_decoders[i].cmd_name) == 0) {
1420 custom_decoders[i].decode(ctx, p);
1421 break;
1422 }
1423 }
1424 }
1425
1426 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0) {
1427 uint64_t next_batch_addr = 0;
1428 bool ppgtt = false;
1429 bool second_level = false;
1430 bool predicate = false;
1431 struct intel_field_iterator iter;
1432 intel_field_iterator_init(&iter, inst, p, 0, false);
1433 while (intel_field_iterator_next(&iter)) {
1434 if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1435 next_batch_addr = iter.raw_value;
1436 } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1437 second_level = iter.raw_value;
1438 } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1439 ppgtt = iter.raw_value;
1440 } else if (strcmp(iter.name, "Predication Enable") == 0) {
1441 predicate = iter.raw_value;
1442 }
1443 }
1444
1445 if (!predicate) {
1446 struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr);
1447
1448 if (next_batch.map == NULL) {
1449 fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1450 next_batch_addr);
1451 } else {
1452 intel_print_batch(ctx, next_batch.map, next_batch.size,
1453 next_batch.addr, false);
1454 }
1455 if (second_level) {
1456 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1457 * like a subroutine call. Commands that come afterwards get
1458 * processed once the 2nd level batch buffer returns with
1459 * MI_BATCH_BUFFER_END.
1460 */
1461 continue;
1462 } else if (!from_ring) {
1463 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1464 * like a goto. Nothing after it will ever get processed. In
1465 * order to prevent the recursion from growing, we just reset the
1466 * loop and continue;
1467 */
1468 break;
1469 }
1470 }
1471 } else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) {
1472 break;
1473 }
1474 }
1475
1476 ctx->n_batch_buffer_start--;
1477 }
1478