1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "common/gen_decoder.h"
25 #include "gen_disasm.h"
26 #include "util/macros.h"
27 #include "main/macros.h" /* Needed for ROUND_DOWN_TO */
28
29 #include <string.h>
30
31 void
gen_batch_decode_ctx_init(struct gen_batch_decode_ctx * ctx,const struct gen_device_info * devinfo,FILE * fp,enum gen_batch_decode_flags flags,const char * xml_path,struct gen_batch_decode_bo (* get_bo)(void *,bool,uint64_t),unsigned (* get_state_size)(void *,uint64_t,uint64_t),void * user_data)32 gen_batch_decode_ctx_init(struct gen_batch_decode_ctx *ctx,
33 const struct gen_device_info *devinfo,
34 FILE *fp, enum gen_batch_decode_flags flags,
35 const char *xml_path,
36 struct gen_batch_decode_bo (*get_bo)(void *,
37 bool,
38 uint64_t),
39 unsigned (*get_state_size)(void *, uint64_t,
40 uint64_t),
41 void *user_data)
42 {
43 memset(ctx, 0, sizeof(*ctx));
44
45 ctx->devinfo = *devinfo;
46 ctx->get_bo = get_bo;
47 ctx->get_state_size = get_state_size;
48 ctx->user_data = user_data;
49 ctx->fp = fp;
50 ctx->flags = flags;
51 ctx->max_vbo_decoded_lines = -1; /* No limit! */
52 ctx->engine = I915_ENGINE_CLASS_RENDER;
53
54 if (xml_path == NULL)
55 ctx->spec = gen_spec_load(devinfo);
56 else
57 ctx->spec = gen_spec_load_from_path(devinfo, xml_path);
58 }
59
60 void
gen_batch_decode_ctx_finish(struct gen_batch_decode_ctx * ctx)61 gen_batch_decode_ctx_finish(struct gen_batch_decode_ctx *ctx)
62 {
63 gen_spec_destroy(ctx->spec);
64 }
65
66 #define CSI "\e["
67 #define RED_COLOR CSI "31m"
68 #define BLUE_HEADER CSI "0;44m"
69 #define GREEN_HEADER CSI "1;42m"
70 #define NORMAL CSI "0m"
71
72 static void
ctx_print_group(struct gen_batch_decode_ctx * ctx,struct gen_group * group,uint64_t address,const void * map)73 ctx_print_group(struct gen_batch_decode_ctx *ctx,
74 struct gen_group *group,
75 uint64_t address, const void *map)
76 {
77 gen_print_group(ctx->fp, group, address, map, 0,
78 (ctx->flags & GEN_BATCH_DECODE_IN_COLOR) != 0);
79 }
80
81 static struct gen_batch_decode_bo
ctx_get_bo(struct gen_batch_decode_ctx * ctx,bool ppgtt,uint64_t addr)82 ctx_get_bo(struct gen_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)
83 {
84 if (gen_spec_get_gen(ctx->spec) >= gen_make_gen(8,0)) {
85 /* On Broadwell and above, we have 48-bit addresses which consume two
86 * dwords. Some packets require that these get stored in a "canonical
87 * form" which means that bit 47 is sign-extended through the upper
88 * bits. In order to correctly handle those aub dumps, we need to mask
89 * off the top 16 bits.
90 */
91 addr &= (~0ull >> 16);
92 }
93
94 struct gen_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr);
95
96 if (gen_spec_get_gen(ctx->spec) >= gen_make_gen(8,0))
97 bo.addr &= (~0ull >> 16);
98
99 /* We may actually have an offset into the bo */
100 if (bo.map != NULL) {
101 assert(bo.addr <= addr);
102 uint64_t offset = addr - bo.addr;
103 bo.map += offset;
104 bo.addr += offset;
105 bo.size -= offset;
106 }
107
108 return bo;
109 }
110
111 static int
update_count(struct gen_batch_decode_ctx * ctx,uint64_t address,uint64_t base_address,unsigned element_dwords,unsigned guess)112 update_count(struct gen_batch_decode_ctx *ctx,
113 uint64_t address,
114 uint64_t base_address,
115 unsigned element_dwords,
116 unsigned guess)
117 {
118 unsigned size = 0;
119
120 if (ctx->get_state_size)
121 size = ctx->get_state_size(ctx->user_data, address, base_address);
122
123 if (size > 0)
124 return size / (sizeof(uint32_t) * element_dwords);
125
126 /* In the absence of any information, just guess arbitrarily. */
127 return guess;
128 }
129
130 static void
ctx_disassemble_program(struct gen_batch_decode_ctx * ctx,uint32_t ksp,const char * type)131 ctx_disassemble_program(struct gen_batch_decode_ctx *ctx,
132 uint32_t ksp, const char *type)
133 {
134 uint64_t addr = ctx->instruction_base + ksp;
135 struct gen_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
136 if (!bo.map)
137 return;
138
139 fprintf(ctx->fp, "\nReferenced %s:\n", type);
140 gen_disassemble(&ctx->devinfo, bo.map, 0, ctx->fp);
141 }
142
143 /* Heuristic to determine whether a uint32_t is probably actually a float
144 * (http://stackoverflow.com/a/2953466)
145 */
146
147 static bool
probably_float(uint32_t bits)148 probably_float(uint32_t bits)
149 {
150 int exp = ((bits & 0x7f800000U) >> 23) - 127;
151 uint32_t mant = bits & 0x007fffff;
152
153 /* +- 0.0 */
154 if (exp == -127 && mant == 0)
155 return true;
156
157 /* +- 1 billionth to 1 billion */
158 if (-30 <= exp && exp <= 30)
159 return true;
160
161 /* some value with only a few binary digits */
162 if ((mant & 0x0000ffff) == 0)
163 return true;
164
165 return false;
166 }
167
168 static void
ctx_print_buffer(struct gen_batch_decode_ctx * ctx,struct gen_batch_decode_bo bo,uint32_t read_length,uint32_t pitch,int max_lines)169 ctx_print_buffer(struct gen_batch_decode_ctx *ctx,
170 struct gen_batch_decode_bo bo,
171 uint32_t read_length,
172 uint32_t pitch,
173 int max_lines)
174 {
175 const uint32_t *dw_end =
176 bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
177
178 int column_count = 0, pitch_col_count = 0, line_count = -1;
179 for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
180 if (pitch_col_count * 4 == pitch || column_count == 8) {
181 fprintf(ctx->fp, "\n");
182 column_count = 0;
183 if (pitch_col_count * 4 == pitch)
184 pitch_col_count = 0;
185 line_count++;
186
187 if (max_lines >= 0 && line_count >= max_lines)
188 break;
189 }
190 fprintf(ctx->fp, column_count == 0 ? " " : " ");
191
192 if ((ctx->flags & GEN_BATCH_DECODE_FLOATS) && probably_float(*dw))
193 fprintf(ctx->fp, " %8.2f", *(float *) dw);
194 else
195 fprintf(ctx->fp, " 0x%08x", *dw);
196
197 column_count++;
198 pitch_col_count++;
199 }
200 fprintf(ctx->fp, "\n");
201 }
202
203 static struct gen_group *
gen_ctx_find_instruction(struct gen_batch_decode_ctx * ctx,const uint32_t * p)204 gen_ctx_find_instruction(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
205 {
206 return gen_spec_find_instruction(ctx->spec, ctx->engine, p);
207 }
208
209 static void
handle_state_base_address(struct gen_batch_decode_ctx * ctx,const uint32_t * p)210 handle_state_base_address(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
211 {
212 struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
213
214 struct gen_field_iterator iter;
215 gen_field_iterator_init(&iter, inst, p, 0, false);
216
217 uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0;
218 bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0;
219
220 while (gen_field_iterator_next(&iter)) {
221 if (strcmp(iter.name, "Surface State Base Address") == 0) {
222 surface_base = iter.raw_value;
223 } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
224 dynamic_base = iter.raw_value;
225 } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
226 instruction_base = iter.raw_value;
227 } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) {
228 surface_modify = iter.raw_value;
229 } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
230 dynamic_modify = iter.raw_value;
231 } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
232 instruction_modify = iter.raw_value;
233 }
234 }
235
236 if (dynamic_modify)
237 ctx->dynamic_base = dynamic_base;
238
239 if (surface_modify)
240 ctx->surface_base = surface_base;
241
242 if (instruction_modify)
243 ctx->instruction_base = instruction_base;
244 }
245
246 static void
dump_binding_table(struct gen_batch_decode_ctx * ctx,uint32_t offset,int count)247 dump_binding_table(struct gen_batch_decode_ctx *ctx, uint32_t offset, int count)
248 {
249 struct gen_group *strct =
250 gen_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
251 if (strct == NULL) {
252 fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
253 return;
254 }
255
256 if (count < 0) {
257 count = update_count(ctx, ctx->surface_base + offset,
258 ctx->surface_base, 1, 8);
259 }
260
261 if (offset % 32 != 0 || offset >= UINT16_MAX) {
262 fprintf(ctx->fp, " invalid binding table pointer\n");
263 return;
264 }
265
266 struct gen_batch_decode_bo bind_bo =
267 ctx_get_bo(ctx, true, ctx->surface_base + offset);
268
269 if (bind_bo.map == NULL) {
270 fprintf(ctx->fp, " binding table unavailable\n");
271 return;
272 }
273
274 const uint32_t *pointers = bind_bo.map;
275 for (int i = 0; i < count; i++) {
276 if (pointers[i] == 0)
277 continue;
278
279 uint64_t addr = ctx->surface_base + pointers[i];
280 struct gen_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
281 uint32_t size = strct->dw_length * 4;
282
283 if (pointers[i] % 32 != 0 ||
284 addr < bo.addr || addr + size >= bo.addr + bo.size) {
285 fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]);
286 continue;
287 }
288
289 fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]);
290 ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
291 }
292 }
293
294 static void
dump_samplers(struct gen_batch_decode_ctx * ctx,uint32_t offset,int count)295 dump_samplers(struct gen_batch_decode_ctx *ctx, uint32_t offset, int count)
296 {
297 struct gen_group *strct = gen_spec_find_struct(ctx->spec, "SAMPLER_STATE");
298 uint64_t state_addr = ctx->dynamic_base + offset;
299
300 if (count < 0) {
301 count = update_count(ctx, state_addr, ctx->dynamic_base,
302 strct->dw_length, 4);
303 }
304
305 struct gen_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
306 const void *state_map = bo.map;
307
308 if (state_map == NULL) {
309 fprintf(ctx->fp, " samplers unavailable\n");
310 return;
311 }
312
313 if (offset % 32 != 0 || state_addr - bo.addr >= bo.size) {
314 fprintf(ctx->fp, " invalid sampler state pointer\n");
315 return;
316 }
317
318 for (int i = 0; i < count; i++) {
319 fprintf(ctx->fp, "sampler state %d\n", i);
320 ctx_print_group(ctx, strct, state_addr, state_map);
321 state_addr += 16;
322 state_map += 16;
323 }
324 }
325
326 static void
handle_media_interface_descriptor_load(struct gen_batch_decode_ctx * ctx,const uint32_t * p)327 handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
328 const uint32_t *p)
329 {
330 struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
331 struct gen_group *desc =
332 gen_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
333
334 struct gen_field_iterator iter;
335 gen_field_iterator_init(&iter, inst, p, 0, false);
336 uint32_t descriptor_offset = 0;
337 int descriptor_count = 0;
338 while (gen_field_iterator_next(&iter)) {
339 if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
340 descriptor_offset = strtol(iter.value, NULL, 16);
341 } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
342 descriptor_count =
343 strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
344 }
345 }
346
347 uint64_t desc_addr = ctx->dynamic_base + descriptor_offset;
348 struct gen_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr);
349 const void *desc_map = bo.map;
350
351 if (desc_map == NULL) {
352 fprintf(ctx->fp, " interface descriptors unavailable\n");
353 return;
354 }
355
356 for (int i = 0; i < descriptor_count; i++) {
357 fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
358
359 ctx_print_group(ctx, desc, desc_addr, desc_map);
360
361 gen_field_iterator_init(&iter, desc, desc_map, 0, false);
362 uint64_t ksp = 0;
363 uint32_t sampler_offset = 0, sampler_count = 0;
364 uint32_t binding_table_offset = 0, binding_entry_count = 0;
365 while (gen_field_iterator_next(&iter)) {
366 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
367 ksp = strtoll(iter.value, NULL, 16);
368 } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
369 sampler_offset = strtol(iter.value, NULL, 16);
370 } else if (strcmp(iter.name, "Sampler Count") == 0) {
371 sampler_count = strtol(iter.value, NULL, 10);
372 } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
373 binding_table_offset = strtol(iter.value, NULL, 16);
374 } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
375 binding_entry_count = strtol(iter.value, NULL, 10);
376 }
377 }
378
379 ctx_disassemble_program(ctx, ksp, "compute shader");
380 fprintf(ctx->fp, "\n");
381
382 dump_samplers(ctx, sampler_offset, sampler_count);
383 dump_binding_table(ctx, binding_table_offset, binding_entry_count);
384
385 desc_map += desc->dw_length;
386 desc_addr += desc->dw_length * 4;
387 }
388 }
389
390 static void
handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx * ctx,const uint32_t * p)391 handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx *ctx,
392 const uint32_t *p)
393 {
394 struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
395 struct gen_group *vbs = gen_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
396
397 struct gen_batch_decode_bo vb = {};
398 uint32_t vb_size = 0;
399 int index = -1;
400 int pitch = -1;
401 bool ready = false;
402
403 struct gen_field_iterator iter;
404 gen_field_iterator_init(&iter, inst, p, 0, false);
405 while (gen_field_iterator_next(&iter)) {
406 if (iter.struct_desc != vbs)
407 continue;
408
409 struct gen_field_iterator vbs_iter;
410 gen_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false);
411 while (gen_field_iterator_next(&vbs_iter)) {
412 if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
413 index = vbs_iter.raw_value;
414 } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
415 pitch = vbs_iter.raw_value;
416 } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
417 vb = ctx_get_bo(ctx, true, vbs_iter.raw_value);
418 } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
419 vb_size = vbs_iter.raw_value;
420 ready = true;
421 } else if (strcmp(vbs_iter.name, "End Address") == 0) {
422 if (vb.map && vbs_iter.raw_value >= vb.addr)
423 vb_size = (vbs_iter.raw_value + 1) - vb.addr;
424 else
425 vb_size = 0;
426 ready = true;
427 }
428
429 if (!ready)
430 continue;
431
432 fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
433
434 if (vb.map == NULL) {
435 fprintf(ctx->fp, " buffer contents unavailable\n");
436 continue;
437 }
438
439 if (vb.map == 0 || vb_size == 0)
440 continue;
441
442 ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines);
443
444 vb.map = NULL;
445 vb_size = 0;
446 index = -1;
447 pitch = -1;
448 ready = false;
449 }
450 }
451 }
452
453 static void
handle_3dstate_index_buffer(struct gen_batch_decode_ctx * ctx,const uint32_t * p)454 handle_3dstate_index_buffer(struct gen_batch_decode_ctx *ctx,
455 const uint32_t *p)
456 {
457 struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
458
459 struct gen_batch_decode_bo ib = {};
460 uint32_t ib_size = 0;
461 uint32_t format = 0;
462
463 struct gen_field_iterator iter;
464 gen_field_iterator_init(&iter, inst, p, 0, false);
465 while (gen_field_iterator_next(&iter)) {
466 if (strcmp(iter.name, "Index Format") == 0) {
467 format = iter.raw_value;
468 } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
469 ib = ctx_get_bo(ctx, true, iter.raw_value);
470 } else if (strcmp(iter.name, "Buffer Size") == 0) {
471 ib_size = iter.raw_value;
472 }
473 }
474
475 if (ib.map == NULL) {
476 fprintf(ctx->fp, " buffer contents unavailable\n");
477 return;
478 }
479
480 const void *m = ib.map;
481 const void *ib_end = ib.map + MIN2(ib.size, ib_size);
482 for (int i = 0; m < ib_end && i < 10; i++) {
483 switch (format) {
484 case 0:
485 fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
486 m += 1;
487 break;
488 case 1:
489 fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
490 m += 2;
491 break;
492 case 2:
493 fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
494 m += 4;
495 break;
496 }
497 }
498
499 if (m < ib_end)
500 fprintf(ctx->fp, "...");
501 fprintf(ctx->fp, "\n");
502 }
503
504 static void
decode_single_ksp(struct gen_batch_decode_ctx * ctx,const uint32_t * p)505 decode_single_ksp(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
506 {
507 struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
508
509 uint64_t ksp = 0;
510 bool is_simd8 = ctx->devinfo.gen >= 11; /* vertex shaders on Gen8+ only */
511 bool is_enabled = true;
512
513 struct gen_field_iterator iter;
514 gen_field_iterator_init(&iter, inst, p, 0, false);
515 while (gen_field_iterator_next(&iter)) {
516 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
517 ksp = iter.raw_value;
518 } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
519 is_simd8 = iter.raw_value;
520 } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
521 is_simd8 = strcmp(iter.value, "SIMD8") == 0;
522 } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
523 is_simd8 = strcmp(iter.value, "SIMD8") == 0;
524 } else if (strcmp(iter.name, "Enable") == 0) {
525 is_enabled = iter.raw_value;
526 }
527 }
528
529 const char *type =
530 strcmp(inst->name, "VS_STATE") == 0 ? "vertex shader" :
531 strcmp(inst->name, "GS_STATE") == 0 ? "geometry shader" :
532 strcmp(inst->name, "SF_STATE") == 0 ? "strips and fans shader" :
533 strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
534 strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
535 strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" :
536 strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
537 strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
538 NULL;
539
540 if (is_enabled) {
541 ctx_disassemble_program(ctx, ksp, type);
542 fprintf(ctx->fp, "\n");
543 }
544 }
545
546 static void
decode_ps_kernels(struct gen_batch_decode_ctx * ctx,const uint32_t * p)547 decode_ps_kernels(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
548 {
549 struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
550
551 uint64_t ksp[3] = {0, 0, 0};
552 bool enabled[3] = {false, false, false};
553
554 struct gen_field_iterator iter;
555 gen_field_iterator_init(&iter, inst, p, 0, false);
556 while (gen_field_iterator_next(&iter)) {
557 if (strncmp(iter.name, "Kernel Start Pointer ",
558 strlen("Kernel Start Pointer ")) == 0) {
559 int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
560 ksp[idx] = strtol(iter.value, NULL, 16);
561 } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
562 enabled[0] = strcmp(iter.value, "true") == 0;
563 } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
564 enabled[1] = strcmp(iter.value, "true") == 0;
565 } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
566 enabled[2] = strcmp(iter.value, "true") == 0;
567 }
568 }
569
570 /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
571 if (enabled[0] + enabled[1] + enabled[2] == 1) {
572 if (enabled[1]) {
573 ksp[1] = ksp[0];
574 ksp[0] = 0;
575 } else if (enabled[2]) {
576 ksp[2] = ksp[0];
577 ksp[0] = 0;
578 }
579 } else {
580 uint64_t tmp = ksp[1];
581 ksp[1] = ksp[2];
582 ksp[2] = tmp;
583 }
584
585 if (enabled[0])
586 ctx_disassemble_program(ctx, ksp[0], "SIMD8 fragment shader");
587 if (enabled[1])
588 ctx_disassemble_program(ctx, ksp[1], "SIMD16 fragment shader");
589 if (enabled[2])
590 ctx_disassemble_program(ctx, ksp[2], "SIMD32 fragment shader");
591
592 if (enabled[0] || enabled[1] || enabled[2])
593 fprintf(ctx->fp, "\n");
594 }
595
596 static void
decode_3dstate_constant_all(struct gen_batch_decode_ctx * ctx,const uint32_t * p)597 decode_3dstate_constant_all(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
598 {
599 struct gen_group *inst =
600 gen_spec_find_instruction(ctx->spec, ctx->engine, p);
601 struct gen_group *body =
602 gen_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA");
603
604 uint32_t read_length[4];
605 struct gen_batch_decode_bo buffer[4];
606 memset(buffer, 0, sizeof(buffer));
607
608 struct gen_field_iterator outer;
609 gen_field_iterator_init(&outer, inst, p, 0, false);
610 int idx = 0;
611 while (gen_field_iterator_next(&outer)) {
612 if (outer.struct_desc != body)
613 continue;
614
615 struct gen_field_iterator iter;
616 gen_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
617 0, false);
618 while (gen_field_iterator_next(&iter)) {
619 if (!strcmp(iter.name, "Pointer To Constant Buffer")) {
620 buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value);
621 } else if (!strcmp(iter.name, "Constant Buffer Read Length")) {
622 read_length[idx] = iter.raw_value;
623 }
624 }
625 idx++;
626 }
627
628 for (int i = 0; i < 4; i++) {
629 if (read_length[i] == 0 || buffer[i].map == NULL)
630 continue;
631
632 unsigned size = read_length[i] * 32;
633 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
634
635 ctx_print_buffer(ctx, buffer[i], size, 0, -1);
636 }
637 }
638
639 static void
decode_3dstate_constant(struct gen_batch_decode_ctx * ctx,const uint32_t * p)640 decode_3dstate_constant(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
641 {
642 struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
643 struct gen_group *body =
644 gen_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
645
646 uint32_t read_length[4] = {0};
647 uint64_t read_addr[4];
648
649 struct gen_field_iterator outer;
650 gen_field_iterator_init(&outer, inst, p, 0, false);
651 while (gen_field_iterator_next(&outer)) {
652 if (outer.struct_desc != body)
653 continue;
654
655 struct gen_field_iterator iter;
656 gen_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
657 0, false);
658
659 while (gen_field_iterator_next(&iter)) {
660 int idx;
661 if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) {
662 read_length[idx] = iter.raw_value;
663 } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) {
664 read_addr[idx] = iter.raw_value;
665 }
666 }
667
668 for (int i = 0; i < 4; i++) {
669 if (read_length[i] == 0)
670 continue;
671
672 struct gen_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]);
673 if (!buffer.map) {
674 fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
675 continue;
676 }
677
678 unsigned size = read_length[i] * 32;
679 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
680
681 ctx_print_buffer(ctx, buffer, size, 0, -1);
682 }
683 }
684 }
685
686 static void
decode_gen6_3dstate_binding_table_pointers(struct gen_batch_decode_ctx * ctx,const uint32_t * p)687 decode_gen6_3dstate_binding_table_pointers(struct gen_batch_decode_ctx *ctx,
688 const uint32_t *p)
689 {
690 fprintf(ctx->fp, "VS Binding Table:\n");
691 dump_binding_table(ctx, p[1], -1);
692
693 fprintf(ctx->fp, "GS Binding Table:\n");
694 dump_binding_table(ctx, p[2], -1);
695
696 fprintf(ctx->fp, "PS Binding Table:\n");
697 dump_binding_table(ctx, p[3], -1);
698 }
699
700 static void
decode_3dstate_binding_table_pointers(struct gen_batch_decode_ctx * ctx,const uint32_t * p)701 decode_3dstate_binding_table_pointers(struct gen_batch_decode_ctx *ctx,
702 const uint32_t *p)
703 {
704 dump_binding_table(ctx, p[1], -1);
705 }
706
707 static void
decode_3dstate_sampler_state_pointers(struct gen_batch_decode_ctx * ctx,const uint32_t * p)708 decode_3dstate_sampler_state_pointers(struct gen_batch_decode_ctx *ctx,
709 const uint32_t *p)
710 {
711 dump_samplers(ctx, p[1], -1);
712 }
713
714 static void
decode_3dstate_sampler_state_pointers_gen6(struct gen_batch_decode_ctx * ctx,const uint32_t * p)715 decode_3dstate_sampler_state_pointers_gen6(struct gen_batch_decode_ctx *ctx,
716 const uint32_t *p)
717 {
718 dump_samplers(ctx, p[1], -1);
719 dump_samplers(ctx, p[2], -1);
720 dump_samplers(ctx, p[3], -1);
721 }
722
723 static bool
str_ends_with(const char * str,const char * end)724 str_ends_with(const char *str, const char *end)
725 {
726 int offset = strlen(str) - strlen(end);
727 if (offset < 0)
728 return false;
729
730 return strcmp(str + offset, end) == 0;
731 }
732
733 static void
decode_dynamic_state_pointers(struct gen_batch_decode_ctx * ctx,const char * struct_type,const uint32_t * p,int count)734 decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx,
735 const char *struct_type, const uint32_t *p,
736 int count)
737 {
738 struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
739
740 uint32_t state_offset = 0;
741
742 struct gen_field_iterator iter;
743 gen_field_iterator_init(&iter, inst, p, 0, false);
744 while (gen_field_iterator_next(&iter)) {
745 if (str_ends_with(iter.name, "Pointer")) {
746 state_offset = iter.raw_value;
747 break;
748 }
749 }
750
751 uint64_t state_addr = ctx->dynamic_base + state_offset;
752 struct gen_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
753 const void *state_map = bo.map;
754
755 if (state_map == NULL) {
756 fprintf(ctx->fp, " dynamic %s state unavailable\n", struct_type);
757 return;
758 }
759
760 struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type);
761 if (strcmp(struct_type, "BLEND_STATE") == 0) {
762 /* Blend states are different from the others because they have a header
763 * struct called BLEND_STATE which is followed by a variable number of
764 * BLEND_STATE_ENTRY structs.
765 */
766 fprintf(ctx->fp, "%s\n", struct_type);
767 ctx_print_group(ctx, state, state_addr, state_map);
768
769 state_addr += state->dw_length * 4;
770 state_map += state->dw_length * 4;
771
772 struct_type = "BLEND_STATE_ENTRY";
773 state = gen_spec_find_struct(ctx->spec, struct_type);
774 }
775
776 count = update_count(ctx, ctx->dynamic_base + state_offset,
777 ctx->dynamic_base, state->dw_length, count);
778
779 for (int i = 0; i < count; i++) {
780 fprintf(ctx->fp, "%s %d\n", struct_type, i);
781 ctx_print_group(ctx, state, state_addr, state_map);
782
783 state_addr += state->dw_length * 4;
784 state_map += state->dw_length * 4;
785 }
786 }
787
788 static void
decode_3dstate_viewport_state_pointers_cc(struct gen_batch_decode_ctx * ctx,const uint32_t * p)789 decode_3dstate_viewport_state_pointers_cc(struct gen_batch_decode_ctx *ctx,
790 const uint32_t *p)
791 {
792 decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
793 }
794
795 static void
decode_3dstate_viewport_state_pointers_sf_clip(struct gen_batch_decode_ctx * ctx,const uint32_t * p)796 decode_3dstate_viewport_state_pointers_sf_clip(struct gen_batch_decode_ctx *ctx,
797 const uint32_t *p)
798 {
799 decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
800 }
801
802 static void
decode_3dstate_blend_state_pointers(struct gen_batch_decode_ctx * ctx,const uint32_t * p)803 decode_3dstate_blend_state_pointers(struct gen_batch_decode_ctx *ctx,
804 const uint32_t *p)
805 {
806 decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1);
807 }
808
809 static void
decode_3dstate_cc_state_pointers(struct gen_batch_decode_ctx * ctx,const uint32_t * p)810 decode_3dstate_cc_state_pointers(struct gen_batch_decode_ctx *ctx,
811 const uint32_t *p)
812 {
813 decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
814 }
815
816 static void
decode_3dstate_scissor_state_pointers(struct gen_batch_decode_ctx * ctx,const uint32_t * p)817 decode_3dstate_scissor_state_pointers(struct gen_batch_decode_ctx *ctx,
818 const uint32_t *p)
819 {
820 decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
821 }
822
823 static void
decode_3dstate_slice_table_state_pointers(struct gen_batch_decode_ctx * ctx,const uint32_t * p)824 decode_3dstate_slice_table_state_pointers(struct gen_batch_decode_ctx *ctx,
825 const uint32_t *p)
826 {
827 decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1);
828 }
829
830 static void
decode_load_register_imm(struct gen_batch_decode_ctx * ctx,const uint32_t * p)831 decode_load_register_imm(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
832 {
833 struct gen_group *reg = gen_spec_find_register(ctx->spec, p[1]);
834
835 if (reg != NULL) {
836 fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
837 reg->name, reg->register_offset, p[2]);
838 ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
839 }
840 }
841
842 static void
decode_vs_state(struct gen_batch_decode_ctx * ctx,uint32_t offset)843 decode_vs_state(struct gen_batch_decode_ctx *ctx, uint32_t offset)
844 {
845 struct gen_group *strct =
846 gen_spec_find_struct(ctx->spec, "VS_STATE");
847 if (strct == NULL) {
848 fprintf(ctx->fp, "did not find VS_STATE info\n");
849 return;
850 }
851
852 struct gen_batch_decode_bo bind_bo =
853 ctx_get_bo(ctx, true, offset);
854
855 if (bind_bo.map == NULL) {
856 fprintf(ctx->fp, " vs state unavailable\n");
857 return;
858 }
859
860 ctx_print_group(ctx, strct, offset, bind_bo.map);
861 }
862
863
864 static void
decode_clip_state(struct gen_batch_decode_ctx * ctx,uint32_t offset)865 decode_clip_state(struct gen_batch_decode_ctx *ctx, uint32_t offset)
866 {
867 struct gen_group *strct =
868 gen_spec_find_struct(ctx->spec, "CLIP_STATE");
869 if (strct == NULL) {
870 fprintf(ctx->fp, "did not find CLIP_STATE info\n");
871 return;
872 }
873
874 struct gen_batch_decode_bo bind_bo =
875 ctx_get_bo(ctx, true, offset);
876
877 if (bind_bo.map == NULL) {
878 fprintf(ctx->fp, " clip state unavailable\n");
879 return;
880 }
881
882 ctx_print_group(ctx, strct, offset, bind_bo.map);
883
884 struct gen_group *vp_strct =
885 gen_spec_find_struct(ctx->spec, "CLIP_VIEWPORT");
886 if (vp_strct == NULL) {
887 fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n");
888 return;
889 }
890 uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3;
891 struct gen_batch_decode_bo vp_bo =
892 ctx_get_bo(ctx, true, clip_vp_offset);
893 if (vp_bo.map == NULL) {
894 fprintf(ctx->fp, " clip vp state unavailable\n");
895 return;
896 }
897 ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map);
898 }
899
900 static void
decode_sf_state(struct gen_batch_decode_ctx * ctx,uint32_t offset)901 decode_sf_state(struct gen_batch_decode_ctx *ctx, uint32_t offset)
902 {
903 struct gen_group *strct =
904 gen_spec_find_struct(ctx->spec, "SF_STATE");
905 if (strct == NULL) {
906 fprintf(ctx->fp, "did not find SF_STATE info\n");
907 return;
908 }
909
910 struct gen_batch_decode_bo bind_bo =
911 ctx_get_bo(ctx, true, offset);
912
913 if (bind_bo.map == NULL) {
914 fprintf(ctx->fp, " sf state unavailable\n");
915 return;
916 }
917
918 ctx_print_group(ctx, strct, offset, bind_bo.map);
919
920 struct gen_group *vp_strct =
921 gen_spec_find_struct(ctx->spec, "SF_VIEWPORT");
922 if (vp_strct == NULL) {
923 fprintf(ctx->fp, "did not find SF_VIEWPORT info\n");
924 return;
925 }
926
927 uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3;
928 struct gen_batch_decode_bo vp_bo =
929 ctx_get_bo(ctx, true, sf_vp_offset);
930 if (vp_bo.map == NULL) {
931 fprintf(ctx->fp, " sf vp state unavailable\n");
932 return;
933 }
934 ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map);
935 }
936
937 static void
decode_wm_state(struct gen_batch_decode_ctx * ctx,uint32_t offset)938 decode_wm_state(struct gen_batch_decode_ctx *ctx, uint32_t offset)
939 {
940 struct gen_group *strct =
941 gen_spec_find_struct(ctx->spec, "WM_STATE");
942 if (strct == NULL) {
943 fprintf(ctx->fp, "did not find WM_STATE info\n");
944 return;
945 }
946
947 struct gen_batch_decode_bo bind_bo =
948 ctx_get_bo(ctx, true, offset);
949
950 if (bind_bo.map == NULL) {
951 fprintf(ctx->fp, " wm state unavailable\n");
952 return;
953 }
954
955 ctx_print_group(ctx, strct, offset, bind_bo.map);
956 }
957
958 static void
decode_cc_state(struct gen_batch_decode_ctx * ctx,uint32_t offset)959 decode_cc_state(struct gen_batch_decode_ctx *ctx, uint32_t offset)
960 {
961 struct gen_group *strct =
962 gen_spec_find_struct(ctx->spec, "COLOR_CALC_STATE");
963 if (strct == NULL) {
964 fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n");
965 return;
966 }
967
968 struct gen_batch_decode_bo bind_bo =
969 ctx_get_bo(ctx, true, offset);
970
971 if (bind_bo.map == NULL) {
972 fprintf(ctx->fp, " cc state unavailable\n");
973 return;
974 }
975
976 ctx_print_group(ctx, strct, offset, bind_bo.map);
977
978 struct gen_group *vp_strct =
979 gen_spec_find_struct(ctx->spec, "CC_VIEWPORT");
980 if (vp_strct == NULL) {
981 fprintf(ctx->fp, "did not find CC_VIEWPORT info\n");
982 return;
983 }
984 uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3;
985 struct gen_batch_decode_bo vp_bo =
986 ctx_get_bo(ctx, true, cc_vp_offset);
987 if (vp_bo.map == NULL) {
988 fprintf(ctx->fp, " cc vp state unavailable\n");
989 return;
990 }
991 ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map);
992 }
993 static void
decode_pipelined_pointers(struct gen_batch_decode_ctx * ctx,const uint32_t * p)994 decode_pipelined_pointers(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
995 {
996 fprintf(ctx->fp, "VS State Table:\n");
997 decode_vs_state(ctx, p[1]);
998 fprintf(ctx->fp, "Clip State Table:\n");
999 decode_clip_state(ctx, p[3] & ~1);
1000 fprintf(ctx->fp, "SF State Table:\n");
1001 decode_sf_state(ctx, p[4]);
1002 fprintf(ctx->fp, "WM State Table:\n");
1003 decode_wm_state(ctx, p[5]);
1004 fprintf(ctx->fp, "CC State Table:\n");
1005 decode_cc_state(ctx, p[6]);
1006 }
1007
1008 struct custom_decoder {
1009 const char *cmd_name;
1010 void (*decode)(struct gen_batch_decode_ctx *ctx, const uint32_t *p);
1011 } custom_decoders[] = {
1012 { "STATE_BASE_ADDRESS", handle_state_base_address },
1013 { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
1014 { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
1015 { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
1016 { "3DSTATE_VS", decode_single_ksp },
1017 { "3DSTATE_GS", decode_single_ksp },
1018 { "3DSTATE_DS", decode_single_ksp },
1019 { "3DSTATE_HS", decode_single_ksp },
1020 { "3DSTATE_PS", decode_ps_kernels },
1021 { "3DSTATE_WM", decode_ps_kernels },
1022 { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
1023 { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
1024 { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
1025 { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
1026 { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
1027 { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
1028
1029 { "3DSTATE_BINDING_TABLE_POINTERS", decode_gen6_3dstate_binding_table_pointers },
1030 { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
1031 { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
1032 { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
1033 { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
1034 { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
1035
1036 { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
1037 { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
1038 { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
1039 { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
1040 { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
1041 { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gen6 },
1042
1043 { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
1044 { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
1045 { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
1046 { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
1047 { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
1048 { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
1049 { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
1050 { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers }
1051 };
1052
1053 void
gen_print_batch(struct gen_batch_decode_ctx * ctx,const uint32_t * batch,uint32_t batch_size,uint64_t batch_addr,bool from_ring)1054 gen_print_batch(struct gen_batch_decode_ctx *ctx,
1055 const uint32_t *batch, uint32_t batch_size,
1056 uint64_t batch_addr, bool from_ring)
1057 {
1058 const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1059 int length;
1060 struct gen_group *inst;
1061 const char *reset_color = ctx->flags & GEN_BATCH_DECODE_IN_COLOR ? NORMAL : "";
1062
1063 if (ctx->n_batch_buffer_start >= 100) {
1064 fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n",
1065 (ctx->flags & GEN_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1066 (ctx->flags & GEN_BATCH_DECODE_OFFSETS) ? batch_addr : 0,
1067 reset_color);
1068 return;
1069 }
1070
1071 ctx->n_batch_buffer_start++;
1072
1073 for (p = batch; p < end; p += length) {
1074 inst = gen_ctx_find_instruction(ctx, p);
1075 length = gen_group_get_length(inst, p);
1076 assert(inst == NULL || length > 0);
1077 length = MAX2(1, length);
1078
1079 uint64_t offset;
1080 if (ctx->flags & GEN_BATCH_DECODE_OFFSETS)
1081 offset = batch_addr + ((char *)p - (char *)batch);
1082 else
1083 offset = 0;
1084
1085 if (inst == NULL) {
1086 fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n",
1087 (ctx->flags & GEN_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1088 offset, p[0], reset_color);
1089 continue;
1090 }
1091
1092 const char *color;
1093 const char *inst_name = gen_group_get_name(inst);
1094 if (ctx->flags & GEN_BATCH_DECODE_IN_COLOR) {
1095 reset_color = NORMAL;
1096 if (ctx->flags & GEN_BATCH_DECODE_FULL) {
1097 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
1098 strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
1099 color = GREEN_HEADER;
1100 else
1101 color = BLUE_HEADER;
1102 } else {
1103 color = NORMAL;
1104 }
1105 } else {
1106 color = "";
1107 reset_color = "";
1108 }
1109
1110 fprintf(ctx->fp, "%s0x%08"PRIx64": 0x%08x: %-80s%s\n",
1111 color, offset, p[0], inst_name, reset_color);
1112
1113 if (ctx->flags & GEN_BATCH_DECODE_FULL) {
1114 ctx_print_group(ctx, inst, offset, p);
1115
1116 for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) {
1117 if (strcmp(inst_name, custom_decoders[i].cmd_name) == 0) {
1118 custom_decoders[i].decode(ctx, p);
1119 break;
1120 }
1121 }
1122 }
1123
1124 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0) {
1125 uint64_t next_batch_addr = 0;
1126 bool ppgtt = false;
1127 bool second_level = false;
1128 struct gen_field_iterator iter;
1129 gen_field_iterator_init(&iter, inst, p, 0, false);
1130 while (gen_field_iterator_next(&iter)) {
1131 if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1132 next_batch_addr = iter.raw_value;
1133 } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1134 second_level = iter.raw_value;
1135 } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1136 ppgtt = iter.raw_value;
1137 }
1138 }
1139
1140 struct gen_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr);
1141
1142 if (next_batch.map == NULL) {
1143 fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1144 next_batch_addr);
1145 } else {
1146 gen_print_batch(ctx, next_batch.map, next_batch.size,
1147 next_batch.addr, false);
1148 }
1149 if (second_level) {
1150 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1151 * like a subroutine call. Commands that come afterwards get
1152 * processed once the 2nd level batch buffer returns with
1153 * MI_BATCH_BUFFER_END.
1154 */
1155 continue;
1156 } else if (!from_ring) {
1157 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1158 * like a goto. Nothing after it will ever get processed. In
1159 * order to prevent the recursion from growing, we just reset the
1160 * loop and continue;
1161 */
1162 break;
1163 }
1164 } else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) {
1165 break;
1166 }
1167 }
1168
1169 ctx->n_batch_buffer_start--;
1170 }
1171