1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "intel_decoder.h"
25 #include "intel_decoder_private.h"
26
27 #include "util/macros.h"
28 #include "util/u_debug.h"
29 #include "util/u_dynarray.h"
30 #include "util/u_math.h" /* Needed for ROUND_DOWN_TO */
31
32 #include <string.h>
33
34 static const struct debug_control debug_control[] = {
35 { "color", INTEL_BATCH_DECODE_IN_COLOR },
36 { "full", INTEL_BATCH_DECODE_FULL },
37 { "offsets", INTEL_BATCH_DECODE_OFFSETS },
38 { "floats", INTEL_BATCH_DECODE_FLOATS },
39 { "surfaces", INTEL_BATCH_DECODE_SURFACES },
40 { "accumulate", INTEL_BATCH_DECODE_ACCUMULATE },
41 { "vb-data", INTEL_BATCH_DECODE_VB_DATA },
42 { NULL, 0 }
43 };
44
45 void
intel_batch_decode_ctx_init(struct intel_batch_decode_ctx * ctx,const struct intel_device_info * devinfo,FILE * fp,enum intel_batch_decode_flags flags,const char * xml_path,struct intel_batch_decode_bo (* get_bo)(void *,bool,uint64_t),unsigned (* get_state_size)(void *,uint64_t,uint64_t),void * user_data)46 intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
47 const struct intel_device_info *devinfo,
48 FILE *fp, enum intel_batch_decode_flags flags,
49 const char *xml_path,
50 struct intel_batch_decode_bo (*get_bo)(void *,
51 bool,
52 uint64_t),
53 unsigned (*get_state_size)(void *, uint64_t,
54 uint64_t),
55 void *user_data)
56 {
57 memset(ctx, 0, sizeof(*ctx));
58
59 ctx->devinfo = *devinfo;
60 ctx->get_bo = get_bo;
61 ctx->get_state_size = get_state_size;
62 ctx->user_data = user_data;
63 ctx->fp = fp;
64 ctx->flags = parse_enable_string(getenv("INTEL_DECODE"), flags, debug_control);
65 ctx->max_vbo_decoded_lines = -1; /* No limit! */
66 ctx->engine = INTEL_ENGINE_CLASS_RENDER;
67
68 if (xml_path == NULL)
69 ctx->spec = intel_spec_load(devinfo);
70 else
71 ctx->spec = intel_spec_load_from_path(devinfo, xml_path);
72
73 ctx->commands =
74 _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
75 ctx->stats =
76 _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
77
78 const char *filters = getenv("INTEL_DECODE_FILTERS");
79 if (filters != NULL) {
80 ctx->filters =
81 _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
82 do {
83 const char *term = filters;
84 if (strlen(term) == 0)
85 break;
86
87 filters = strstr(term, ",");
88
89 char *str = ralloc_strndup(ctx->filters, term,
90 filters != NULL ?
91 (filters - term) : strlen(term));
92 _mesa_hash_table_insert(ctx->filters, str, str);
93
94 if (filters == NULL)
95 break;
96
97 filters++;
98 } while (true);
99 }
100 }
101
102 void
intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx * ctx)103 intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)
104 {
105 if (ctx->filters != NULL)
106 _mesa_hash_table_destroy(ctx->filters, NULL);
107 _mesa_hash_table_destroy(ctx->commands, NULL);
108 _mesa_hash_table_destroy(ctx->stats, NULL);
109 intel_spec_destroy(ctx->spec);
110 }
111
112 #define CSI "\e["
113 #define RED_COLOR CSI "31m"
114 #define BLUE_HEADER CSI "0;44m" CSI "1;37m"
115 #define GREEN_HEADER CSI "1;42m"
116 #define NORMAL CSI "0m"
117
118 static void
ctx_print_group(struct intel_batch_decode_ctx * ctx,const struct intel_group * group,uint64_t address,const void * map)119 ctx_print_group(struct intel_batch_decode_ctx *ctx,
120 const struct intel_group *group,
121 uint64_t address, const void *map)
122 {
123 intel_print_group(ctx->fp, group, address, map, 0,
124 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0);
125 }
126
127 struct intel_batch_decode_bo
ctx_get_bo(struct intel_batch_decode_ctx * ctx,bool ppgtt,uint64_t addr)128 ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)
129 {
130 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) {
131 /* On Broadwell and above, we have 48-bit addresses which consume two
132 * dwords. Some packets require that these get stored in a "canonical
133 * form" which means that bit 47 is sign-extended through the upper
134 * bits. In order to correctly handle those aub dumps, we need to mask
135 * off the top 16 bits.
136 */
137 addr &= (~0ull >> 16);
138 }
139
140 struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr);
141
142 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0))
143 bo.addr &= (~0ull >> 16);
144
145 /* We may actually have an offset into the bo */
146 if (bo.map != NULL) {
147 assert(bo.addr <= addr);
148 uint64_t offset = addr - bo.addr;
149 bo.map += offset;
150 bo.addr += offset;
151 bo.size -= offset;
152 }
153
154 return bo;
155 }
156
157 static int
update_count(struct intel_batch_decode_ctx * ctx,uint64_t address,uint64_t base_address,unsigned element_dwords,unsigned guess)158 update_count(struct intel_batch_decode_ctx *ctx,
159 uint64_t address,
160 uint64_t base_address,
161 unsigned element_dwords,
162 unsigned guess)
163 {
164 unsigned size = 0;
165
166 if (ctx->get_state_size)
167 size = ctx->get_state_size(ctx->user_data, address, base_address);
168
169 if (size > 0)
170 return size / (sizeof(uint32_t) * element_dwords);
171
172 /* In the absence of any information, just guess arbitrarily. */
173 return guess;
174 }
175
176 static inline void
ctx_disassemble_program(struct intel_batch_decode_ctx * ctx,uint32_t ksp,const char * short_name,const char * name)177 ctx_disassemble_program(struct intel_batch_decode_ctx *ctx,
178 uint32_t ksp,
179 const char *short_name,
180 const char *name)
181 {
182 ctx->disassemble_program(ctx, ksp, short_name, name);
183 }
184
185 /* Heuristic to determine whether a uint32_t is probably actually a float
186 * (http://stackoverflow.com/a/2953466)
187 */
188
189 static bool
probably_float(uint32_t bits)190 probably_float(uint32_t bits)
191 {
192 int exp = ((bits & 0x7f800000U) >> 23) - 127;
193 uint32_t mant = bits & 0x007fffff;
194
195 /* +- 0.0 */
196 if (exp == -127 && mant == 0)
197 return true;
198
199 /* +- 1 billionth to 1 billion */
200 if (-30 <= exp && exp <= 30)
201 return true;
202
203 /* some value with only a few binary digits */
204 if ((mant & 0x0000ffff) == 0)
205 return true;
206
207 return false;
208 }
209
210 static void
ctx_print_buffer(struct intel_batch_decode_ctx * ctx,struct intel_batch_decode_bo bo,uint32_t read_length,uint32_t pitch,int max_lines)211 ctx_print_buffer(struct intel_batch_decode_ctx *ctx,
212 struct intel_batch_decode_bo bo,
213 uint32_t read_length,
214 uint32_t pitch,
215 int max_lines)
216 {
217 const uint32_t *dw_end =
218 bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
219
220 int column_count = 0, pitch_col_count = 0, line_count = -1;
221 for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
222 if (pitch_col_count * 4 == pitch || column_count == 8) {
223 fprintf(ctx->fp, "\n");
224 column_count = 0;
225 if (pitch_col_count * 4 == pitch)
226 pitch_col_count = 0;
227 line_count++;
228
229 if (max_lines >= 0 && line_count >= max_lines)
230 break;
231 }
232 fprintf(ctx->fp, column_count == 0 ? " " : " ");
233
234 if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw))
235 fprintf(ctx->fp, " %8.2f", *(float *) dw);
236 else
237 fprintf(ctx->fp, " 0x%08x", *dw);
238
239 column_count++;
240 pitch_col_count++;
241 }
242 fprintf(ctx->fp, "\n");
243 }
244
245 static struct intel_group *
intel_ctx_find_instruction(struct intel_batch_decode_ctx * ctx,const uint32_t * p)246 intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
247 {
248 return intel_spec_find_instruction(ctx->spec, ctx->engine, p);
249 }
250
251 static void
handle_state_base_address(struct intel_batch_decode_ctx * ctx,const uint32_t * p)252 handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
253 {
254 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
255
256 struct intel_field_iterator iter;
257 intel_field_iterator_init(&iter, inst, p, 0, false);
258
259 uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0;
260 bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0;
261
262 while (intel_field_iterator_next(&iter)) {
263 if (strcmp(iter.name, "Surface State Base Address") == 0) {
264 surface_base = iter.raw_value;
265 } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
266 dynamic_base = iter.raw_value;
267 } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
268 instruction_base = iter.raw_value;
269 } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) {
270 surface_modify = iter.raw_value;
271 } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
272 dynamic_modify = iter.raw_value;
273 } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
274 instruction_modify = iter.raw_value;
275 }
276 }
277
278 if (dynamic_modify)
279 ctx->dynamic_base = dynamic_base;
280
281 if (surface_modify)
282 ctx->surface_base = surface_base;
283
284 if (instruction_modify)
285 ctx->instruction_base = instruction_base;
286 }
287
288 static void
handle_binding_table_pool_alloc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)289 handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx,
290 const uint32_t *p)
291 {
292 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
293
294 struct intel_field_iterator iter;
295 intel_field_iterator_init(&iter, inst, p, 0, false);
296
297 uint64_t bt_pool_base = 0;
298 bool bt_pool_enable = false;
299
300 while (intel_field_iterator_next(&iter)) {
301 if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) {
302 bt_pool_base = iter.raw_value;
303 } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) {
304 bt_pool_enable = iter.raw_value;
305 }
306 }
307
308 if (bt_pool_enable || ctx->devinfo.verx10 >= 125) {
309 ctx->bt_pool_base = bt_pool_base;
310 } else {
311 ctx->bt_pool_base = 0;
312 }
313 }
314
315 static void
dump_binding_table(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)316 dump_binding_table(struct intel_batch_decode_ctx *ctx,
317 uint32_t offset, int count)
318 {
319 struct intel_group *strct =
320 intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
321 if (strct == NULL) {
322 fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
323 return;
324 }
325
326 /* Most platforms use a 16-bit pointer with 32B alignment in bits 15:5. */
327 uint32_t btp_alignment = 32;
328 uint32_t btp_pointer_bits = 16;
329
330 if (ctx->devinfo.verx10 >= 125) {
331 /* The pointer is now 21-bit with 32B alignment in bits 20:5. */
332 btp_pointer_bits = 21;
333 } else if (ctx->use_256B_binding_tables) {
334 /* When 256B binding tables are enabled, we have to shift the offset
335 * which is stored in bits 15:5 but interpreted as bits 18:8 of the
336 * actual offset. The effective pointer is 19-bit with 256B alignment.
337 */
338 offset <<= 3;
339 btp_pointer_bits = 19;
340 btp_alignment = 256;
341 }
342
343 const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base :
344 ctx->surface_base;
345
346 if (count < 0) {
347 count = update_count(ctx, bt_pool_base + offset,
348 bt_pool_base, 1, 32);
349 }
350
351 if (offset % btp_alignment != 0 || offset >= (1u << btp_pointer_bits)) {
352 fprintf(ctx->fp, " invalid binding table pointer\n");
353 return;
354 }
355
356 struct intel_batch_decode_bo bind_bo =
357 ctx_get_bo(ctx, true, bt_pool_base + offset);
358
359 if (bind_bo.map == NULL) {
360 fprintf(ctx->fp, " binding table unavailable\n");
361 return;
362 }
363
364 const uint32_t *pointers = bind_bo.map;
365 for (int i = 0; i < count; i++) {
366 if (((uintptr_t)&pointers[i] >= ((uintptr_t)bind_bo.map + bind_bo.size)))
367 break;
368
369 uint64_t addr = ctx->surface_base + pointers[i];
370 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
371 uint32_t size = strct->dw_length * 4;
372
373 if (pointers[i] % 32 != 0 ||
374 addr < bo.addr || addr + size > bo.addr + bo.size) {
375 fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]);
376 continue;
377 }
378
379 fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]);
380 if (ctx->flags & INTEL_BATCH_DECODE_SURFACES)
381 ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
382 }
383 }
384
385 static void
dump_samplers(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)386 dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)
387 {
388 struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE");
389 uint64_t state_addr = ctx->dynamic_base + offset;
390
391 assert(count > 0);
392
393 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
394 const void *state_map = bo.map;
395
396 if (state_map == NULL) {
397 fprintf(ctx->fp, " samplers unavailable\n");
398 return;
399 }
400
401 if (offset % 32 != 0) {
402 fprintf(ctx->fp, " invalid sampler state pointer\n");
403 return;
404 }
405
406 const unsigned sampler_state_size = strct->dw_length * 4;
407
408 if (count * sampler_state_size >= bo.size) {
409 fprintf(ctx->fp, " sampler state ends after bo ends\n");
410 assert(!"sampler state ends after bo ends");
411 return;
412 }
413
414 for (int i = 0; i < count; i++) {
415 fprintf(ctx->fp, "sampler state %d\n", i);
416 if (ctx->flags & INTEL_BATCH_DECODE_SAMPLERS)
417 ctx_print_group(ctx, strct, state_addr, state_map);
418 state_addr += sampler_state_size;
419 state_map += sampler_state_size;
420 }
421 }
422
423 static void
handle_interface_descriptor_data(struct intel_batch_decode_ctx * ctx,struct intel_group * desc,const uint32_t * p)424 handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx,
425 struct intel_group *desc, const uint32_t *p)
426 {
427 uint64_t ksp = 0;
428 uint32_t sampler_offset = 0, sampler_count = 0;
429 uint32_t binding_table_offset = 0, binding_entry_count = 0;
430
431 struct intel_field_iterator iter;
432 intel_field_iterator_init(&iter, desc, p, 0, false);
433 while (intel_field_iterator_next(&iter)) {
434 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
435 ksp = strtoll(iter.value, NULL, 16);
436 } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
437 sampler_offset = strtol(iter.value, NULL, 16);
438 } else if (strcmp(iter.name, "Sampler Count") == 0) {
439 sampler_count = strtol(iter.value, NULL, 10);
440 } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
441 binding_table_offset = strtol(iter.value, NULL, 16);
442 } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
443 binding_entry_count = strtol(iter.value, NULL, 10);
444 }
445 }
446
447 ctx_disassemble_program(ctx, ksp, "CS", "compute shader");
448 fprintf(ctx->fp, "\n");
449
450 if (sampler_count)
451 dump_samplers(ctx, sampler_offset, sampler_count);
452 if (binding_entry_count)
453 dump_binding_table(ctx, binding_table_offset, binding_entry_count);
454 }
455
456 static void
handle_media_interface_descriptor_load(struct intel_batch_decode_ctx * ctx,const uint32_t * p)457 handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx,
458 const uint32_t *p)
459 {
460 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
461 struct intel_group *desc =
462 intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
463
464 struct intel_field_iterator iter;
465 intel_field_iterator_init(&iter, inst, p, 0, false);
466 uint32_t descriptor_offset = 0;
467 int descriptor_count = 0;
468 while (intel_field_iterator_next(&iter)) {
469 if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
470 descriptor_offset = strtol(iter.value, NULL, 16);
471 } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
472 descriptor_count =
473 strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
474 }
475 }
476
477 uint64_t desc_addr = ctx->dynamic_base + descriptor_offset;
478 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr);
479 const void *desc_map = bo.map;
480
481 if (desc_map == NULL) {
482 fprintf(ctx->fp, " interface descriptors unavailable\n");
483 return;
484 }
485
486 for (int i = 0; i < descriptor_count; i++) {
487 fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
488
489 ctx_print_group(ctx, desc, desc_addr, desc_map);
490
491 handle_interface_descriptor_data(ctx, desc, desc_map);
492
493 desc_map += desc->dw_length;
494 desc_addr += desc->dw_length * 4;
495 }
496 }
497
498 static void
handle_compute_walker(struct intel_batch_decode_ctx * ctx,const uint32_t * p)499 handle_compute_walker(struct intel_batch_decode_ctx *ctx,
500 const uint32_t *p)
501 {
502 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
503
504 struct intel_field_iterator iter;
505 intel_field_iterator_init(&iter, inst, p, 0, false);
506 while (intel_field_iterator_next(&iter)) {
507 if (strcmp(iter.name, "body") == 0) {
508 intel_field_iterator_init(&iter, iter.struct_desc,
509 &iter.p[iter.start_bit / 32],
510 0, false);
511 } else if (strcmp(iter.name, "Interface Descriptor") == 0) {
512 handle_interface_descriptor_data(ctx, iter.struct_desc,
513 &iter.p[iter.start_bit / 32]);
514 }
515 }
516 }
517
518 static void
handle_media_curbe_load(struct intel_batch_decode_ctx * ctx,const uint32_t * p)519 handle_media_curbe_load(struct intel_batch_decode_ctx *ctx,
520 const uint32_t *p)
521 {
522 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
523
524 struct intel_field_iterator iter;
525 intel_field_iterator_init(&iter, inst, p, 0, false);
526
527 uint32_t dynamic_state_offset = 0;
528 uint32_t dynamic_state_length = 0;
529
530 while (intel_field_iterator_next(&iter)) {
531 if (strcmp(iter.name, "CURBE Data Start Address") == 0) {
532 dynamic_state_offset = iter.raw_value;
533 } else if (strcmp(iter.name, "CURBE Total Data Length") == 0) {
534 dynamic_state_length = iter.raw_value;
535 }
536 }
537
538 if (dynamic_state_length > 0) {
539 struct intel_batch_decode_bo buffer =
540 ctx_get_bo(ctx, true, ctx->dynamic_base + dynamic_state_offset);
541 if (buffer.map != NULL)
542 ctx_print_buffer(ctx, buffer, dynamic_state_length, 0, -1);
543 }
544 }
545
546 static void
handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)547 handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx,
548 const uint32_t *p)
549 {
550 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
551 struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
552
553 struct intel_batch_decode_bo vb = {};
554 uint32_t vb_size = 0;
555 int index = -1;
556 int pitch = -1;
557 bool ready = false;
558
559 struct intel_field_iterator iter;
560 intel_field_iterator_init(&iter, inst, p, 0, false);
561 while (intel_field_iterator_next(&iter)) {
562 if (iter.struct_desc != vbs)
563 continue;
564
565 struct intel_field_iterator vbs_iter;
566 intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false);
567 while (intel_field_iterator_next(&vbs_iter)) {
568 if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
569 index = vbs_iter.raw_value;
570 } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
571 pitch = vbs_iter.raw_value;
572 } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
573 vb = ctx_get_bo(ctx, true, vbs_iter.raw_value);
574 } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
575 vb_size = vbs_iter.raw_value;
576 ready = true;
577 } else if (strcmp(vbs_iter.name, "End Address") == 0) {
578 if (vb.map && vbs_iter.raw_value >= vb.addr)
579 vb_size = (vbs_iter.raw_value + 1) - vb.addr;
580 else
581 vb_size = 0;
582 ready = true;
583 }
584
585 if (!ready)
586 continue;
587
588 fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
589
590 if (vb.map == NULL) {
591 fprintf(ctx->fp, " buffer contents unavailable\n");
592 continue;
593 }
594
595 if (vb.map == 0 || vb_size == 0)
596 continue;
597
598 if (ctx->flags & INTEL_BATCH_DECODE_VB_DATA)
599 ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines);
600
601 vb.map = NULL;
602 vb_size = 0;
603 index = -1;
604 pitch = -1;
605 ready = false;
606 }
607 }
608 }
609
610 static void
handle_3dstate_index_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)611 handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx,
612 const uint32_t *p)
613 {
614 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
615
616 struct intel_batch_decode_bo ib = {};
617 uint32_t ib_size = 0;
618 uint32_t format = 0;
619
620 struct intel_field_iterator iter;
621 intel_field_iterator_init(&iter, inst, p, 0, false);
622 while (intel_field_iterator_next(&iter)) {
623 if (strcmp(iter.name, "Index Format") == 0) {
624 format = iter.raw_value;
625 } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
626 ib = ctx_get_bo(ctx, true, iter.raw_value);
627 } else if (strcmp(iter.name, "Buffer Size") == 0) {
628 ib_size = iter.raw_value;
629 }
630 }
631
632 if (ib.map == NULL) {
633 fprintf(ctx->fp, " buffer contents unavailable\n");
634 return;
635 }
636
637 const void *m = ib.map;
638 const void *ib_end = ib.map + MIN2(ib.size, ib_size);
639 for (int i = 0; m < ib_end && i < 10; i++) {
640 switch (format) {
641 case 0:
642 fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
643 m += 1;
644 break;
645 case 1:
646 fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
647 m += 2;
648 break;
649 case 2:
650 fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
651 m += 4;
652 break;
653 }
654 }
655
656 if (m < ib_end)
657 fprintf(ctx->fp, "...");
658 fprintf(ctx->fp, "\n");
659 }
660
661 static void
decode_single_ksp(struct intel_batch_decode_ctx * ctx,const uint32_t * p)662 decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
663 {
664 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
665
666 uint64_t ksp = 0;
667 bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
668 bool is_enabled = true;
669
670 struct intel_field_iterator iter;
671 intel_field_iterator_init(&iter, inst, p, 0, false);
672 while (intel_field_iterator_next(&iter)) {
673 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
674 ksp = iter.raw_value;
675 } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
676 is_simd8 = iter.raw_value;
677 } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
678 is_simd8 = strcmp(iter.value, "SIMD8") == 0;
679 } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
680 is_simd8 = strcmp(iter.value, "SIMD8") == 0;
681 } else if (strcmp(iter.name, "Enable") == 0) {
682 is_enabled = iter.raw_value;
683 }
684 }
685
686 const char *type =
687 strcmp(inst->name, "VS_STATE") == 0 ? "vertex shader" :
688 strcmp(inst->name, "GS_STATE") == 0 ? "geometry shader" :
689 strcmp(inst->name, "SF_STATE") == 0 ? "strips and fans shader" :
690 strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
691 strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
692 strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" :
693 strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
694 strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
695 NULL;
696 const char *short_name =
697 strcmp(inst->name, "VS_STATE") == 0 ? "VS" :
698 strcmp(inst->name, "GS_STATE") == 0 ? "GS" :
699 strcmp(inst->name, "SF_STATE") == 0 ? "SF" :
700 strcmp(inst->name, "CLIP_STATE") == 0 ? "CL" :
701 strcmp(inst->name, "3DSTATE_DS") == 0 ? "DS" :
702 strcmp(inst->name, "3DSTATE_HS") == 0 ? "HS" :
703 strcmp(inst->name, "3DSTATE_VS") == 0 ? "VS" :
704 strcmp(inst->name, "3DSTATE_GS") == 0 ? "GS" :
705 NULL;
706
707 if (is_enabled) {
708 ctx_disassemble_program(ctx, ksp, short_name, type);
709 fprintf(ctx->fp, "\n");
710 }
711 }
712
713 static void
decode_mesh_task_ksp(struct intel_batch_decode_ctx * ctx,const uint32_t * p)714 decode_mesh_task_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
715 {
716 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
717
718 uint64_t ksp = 0;
719 uint64_t local_x_maximum = 0;
720 uint64_t threads = 0;
721
722 struct intel_field_iterator iter;
723 intel_field_iterator_init(&iter, inst, p, 0, false);
724 while (intel_field_iterator_next(&iter)) {
725 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
726 ksp = iter.raw_value;
727 } else if (strcmp(iter.name, "Local X Maximum") == 0) {
728 local_x_maximum = iter.raw_value;
729 } else if (strcmp(iter.name, "Number of Threads in GPGPU Thread Group") == 0) {
730 threads = iter.raw_value;
731 }
732 }
733
734 const char *type =
735 strcmp(inst->name, "3DSTATE_MESH_SHADER") == 0 ? "mesh shader" :
736 strcmp(inst->name, "3DSTATE_TASK_SHADER") == 0 ? "task shader" :
737 NULL;
738 const char *short_name =
739 strcmp(inst->name, "3DSTATE_MESH_SHADER") == 0 ? "MS" :
740 strcmp(inst->name, "3DSTATE_TASK_SHADER") == 0 ? "TS" :
741 NULL;
742
743 if (threads && local_x_maximum) {
744 ctx_disassemble_program(ctx, ksp, short_name, type);
745 fprintf(ctx->fp, "\n");
746 }
747 }
748
749 static void
decode_ps_kern(struct intel_batch_decode_ctx * ctx,struct intel_group * inst,const uint32_t * p)750 decode_ps_kern(struct intel_batch_decode_ctx *ctx,
751 struct intel_group *inst, const uint32_t *p)
752 {
753 bool single_ksp = ctx->devinfo.ver == 4;
754 uint64_t ksp[3] = {0, 0, 0};
755 bool enabled[3] = {false, false, false};
756
757 struct intel_field_iterator iter;
758 intel_field_iterator_init(&iter, inst, p, 0, false);
759 while (intel_field_iterator_next(&iter)) {
760 if (strncmp(iter.name, "Kernel Start Pointer ",
761 strlen("Kernel Start Pointer ")) == 0) {
762 int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
763 ksp[idx] = strtol(iter.value, NULL, 16);
764 } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
765 enabled[0] = strcmp(iter.value, "true") == 0;
766 } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
767 enabled[1] = strcmp(iter.value, "true") == 0;
768 } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
769 enabled[2] = strcmp(iter.value, "true") == 0;
770 }
771 }
772
773 if (single_ksp)
774 ksp[1] = ksp[2] = ksp[0];
775
776 /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
777 if (enabled[0] + enabled[1] + enabled[2] == 1) {
778 if (enabled[1]) {
779 ksp[1] = ksp[0];
780 ksp[0] = 0;
781 } else if (enabled[2]) {
782 ksp[2] = ksp[0];
783 ksp[0] = 0;
784 }
785 } else {
786 uint64_t tmp = ksp[1];
787 ksp[1] = ksp[2];
788 ksp[2] = tmp;
789 }
790
791 if (enabled[0])
792 ctx_disassemble_program(ctx, ksp[0], "FS8", "SIMD8 fragment shader");
793 if (enabled[1])
794 ctx_disassemble_program(ctx, ksp[1], "FS16", "SIMD16 fragment shader");
795 if (enabled[2])
796 ctx_disassemble_program(ctx, ksp[2], "FS32", "SIMD32 fragment shader");
797
798 if (enabled[0] || enabled[1] || enabled[2])
799 fprintf(ctx->fp, "\n");
800 }
801
802 static void
decode_ps_kern_xe2(struct intel_batch_decode_ctx * ctx,struct intel_group * inst,const uint32_t * p)803 decode_ps_kern_xe2(struct intel_batch_decode_ctx *ctx,
804 struct intel_group *inst, const uint32_t *p)
805 {
806 uint64_t ksp[2] = {0, 0};
807 bool enabled[2] = {false, false};
808 int width[2] = {0, 0};
809
810 struct intel_field_iterator iter;
811 intel_field_iterator_init(&iter, inst, p, 0, false);
812 while (intel_field_iterator_next(&iter)) {
813 if (strncmp(iter.name, "Kernel Start Pointer ",
814 strlen("Kernel Start Pointer ")) == 0) {
815 int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
816 ksp[idx] = strtol(iter.value, NULL, 16);
817 } else if (strcmp(iter.name, "Kernel 0 Enable") == 0) {
818 enabled[0] = strcmp(iter.value, "true") == 0;
819 } else if (strcmp(iter.name, "Kernel 1 Enable") == 0) {
820 enabled[1] = strcmp(iter.value, "true") == 0;
821 } else if (strcmp(iter.name, "Kernel[0] : SIMD Width") == 0) {
822 width[0] = strncmp(iter.value, "0 ", 2) == 0 ? 16 : 32;
823 } else if (strcmp(iter.name, "Kernel[1] : SIMD Width") == 0) {
824 width[1] = strncmp(iter.value, "0 ", 2) == 0 ? 16 : 32;
825 }
826 }
827
828 for (int i = 0; i < 2; i++) {
829 if (enabled[i])
830 ctx_disassemble_program(ctx, ksp[i], "FS",
831 width[i] == 16 ?
832 "SIMD16 fragment shader" :
833 "SIMD32 fragment shader");
834 }
835
836 if (enabled[0] || enabled[1])
837 fprintf(ctx->fp, "\n");
838 }
839
840 static void
decode_ps_kernels(struct intel_batch_decode_ctx * ctx,const uint32_t * p)841 decode_ps_kernels(struct intel_batch_decode_ctx *ctx,
842 const uint32_t *p)
843 {
844 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
845 if (ctx->devinfo.ver >= 20)
846 decode_ps_kern_xe2(ctx, inst, p);
847 else
848 decode_ps_kern(ctx, inst, p);
849 }
850
851 static void
decode_3dstate_constant_all(struct intel_batch_decode_ctx * ctx,const uint32_t * p)852 decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
853 {
854 struct intel_group *inst =
855 intel_spec_find_instruction(ctx->spec, ctx->engine, p);
856 struct intel_group *body =
857 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA");
858
859 uint32_t read_length[4] = {0};
860 struct intel_batch_decode_bo buffer[4];
861 memset(buffer, 0, sizeof(buffer));
862
863 struct intel_field_iterator outer;
864 intel_field_iterator_init(&outer, inst, p, 0, false);
865 int idx = 0;
866 while (intel_field_iterator_next(&outer)) {
867 if (outer.struct_desc != body)
868 continue;
869
870 struct intel_field_iterator iter;
871 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
872 0, false);
873 while (intel_field_iterator_next(&iter)) {
874 if (!strcmp(iter.name, "Pointer To Constant Buffer")) {
875 buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value);
876 } else if (!strcmp(iter.name, "Constant Buffer Read Length")) {
877 read_length[idx] = iter.raw_value;
878 }
879 }
880 idx++;
881 }
882
883 for (int i = 0; i < 4; i++) {
884 if (read_length[i] == 0 || buffer[i].map == NULL)
885 continue;
886
887 unsigned size = read_length[i] * 32;
888 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
889
890 ctx_print_buffer(ctx, buffer[i], size, 0, -1);
891 }
892 }
893
894 static void
decode_3dstate_constant(struct intel_batch_decode_ctx * ctx,const uint32_t * p)895 decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
896 {
897 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
898 struct intel_group *body =
899 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
900
901 uint32_t read_length[4] = {0};
902 uint64_t read_addr[4] = {0};
903
904 struct intel_field_iterator outer;
905 intel_field_iterator_init(&outer, inst, p, 0, false);
906 while (intel_field_iterator_next(&outer)) {
907 if (outer.struct_desc != body)
908 continue;
909
910 struct intel_field_iterator iter;
911 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
912 0, false);
913
914 while (intel_field_iterator_next(&iter)) {
915 int idx;
916 if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) {
917 read_length[idx] = iter.raw_value;
918 } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) {
919 read_addr[idx] = iter.raw_value;
920 }
921 }
922
923 for (int i = 0; i < 4; i++) {
924 if (read_length[i] == 0)
925 continue;
926
927 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]);
928 if (!buffer.map) {
929 fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
930 continue;
931 }
932
933 unsigned size = read_length[i] * 32;
934 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
935
936 ctx_print_buffer(ctx, buffer, size, 0, -1);
937 }
938 }
939 }
940
941 static void
decode_gfx4_constant_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)942 decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
943 {
944 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
945 uint64_t read_length = 0, read_addr = 0, valid = 0;
946 struct intel_field_iterator iter;
947 intel_field_iterator_init(&iter, inst, p, 0, false);
948
949 while (intel_field_iterator_next(&iter)) {
950 if (!strcmp(iter.name, "Buffer Length")) {
951 read_length = iter.raw_value;
952 } else if (!strcmp(iter.name, "Valid")) {
953 valid = iter.raw_value;
954 } else if (!strcmp(iter.name, "Buffer Starting Address")) {
955 read_addr = iter.raw_value;
956 }
957 }
958
959 if (!valid)
960 return;
961
962 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr);
963 if (!buffer.map) {
964 fprintf(ctx->fp, "constant buffer unavailable\n");
965 return;
966 }
967 unsigned size = (read_length + 1) * 16 * sizeof(float);
968 fprintf(ctx->fp, "constant buffer size %u\n", size);
969
970 ctx_print_buffer(ctx, buffer, size, 0, -1);
971 }
972
973
974 static void
decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)975 decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
976 const uint32_t *p)
977 {
978 fprintf(ctx->fp, "VS Binding Table:\n");
979 dump_binding_table(ctx, p[1], -1);
980
981 fprintf(ctx->fp, "GS Binding Table:\n");
982 dump_binding_table(ctx, p[2], -1);
983
984 if (ctx->devinfo.ver < 6) {
985 fprintf(ctx->fp, "CLIP Binding Table:\n");
986 dump_binding_table(ctx, p[3], -1);
987 fprintf(ctx->fp, "SF Binding Table:\n");
988 dump_binding_table(ctx, p[4], -1);
989 fprintf(ctx->fp, "PS Binding Table:\n");
990 dump_binding_table(ctx, p[5], -1);
991 } else {
992 fprintf(ctx->fp, "PS Binding Table:\n");
993 dump_binding_table(ctx, p[3], -1);
994 }
995 }
996
997 static void
decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)998 decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
999 const uint32_t *p)
1000 {
1001 dump_binding_table(ctx, p[1], -1);
1002 }
1003
1004 static void
decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1005 decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx,
1006 const uint32_t *p)
1007 {
1008 dump_samplers(ctx, p[1], 1);
1009 }
1010
1011 static void
decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1012 decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx,
1013 const uint32_t *p)
1014 {
1015 dump_samplers(ctx, p[1], 1);
1016 dump_samplers(ctx, p[2], 1);
1017 dump_samplers(ctx, p[3], 1);
1018 }
1019
1020 static bool
str_ends_with(const char * str,const char * end)1021 str_ends_with(const char *str, const char *end)
1022 {
1023 int offset = strlen(str) - strlen(end);
1024 if (offset < 0)
1025 return false;
1026
1027 return strcmp(str + offset, end) == 0;
1028 }
1029
1030 static void
decode_dynamic_state(struct intel_batch_decode_ctx * ctx,const char * struct_type,uint32_t state_offset,int count)1031 decode_dynamic_state(struct intel_batch_decode_ctx *ctx,
1032 const char *struct_type, uint32_t state_offset,
1033 int count)
1034 {
1035 uint64_t state_addr = ctx->dynamic_base + state_offset;
1036 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
1037 const void *state_map = bo.map;
1038
1039 if (state_map == NULL) {
1040 fprintf(ctx->fp, " dynamic %s state unavailable\n", struct_type);
1041 return;
1042 }
1043
1044 struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type);
1045 if (strcmp(struct_type, "BLEND_STATE") == 0) {
1046 /* Blend states are different from the others because they have a header
1047 * struct called BLEND_STATE which is followed by a variable number of
1048 * BLEND_STATE_ENTRY structs.
1049 */
1050 fprintf(ctx->fp, "%s\n", struct_type);
1051 ctx_print_group(ctx, state, state_addr, state_map);
1052
1053 state_addr += state->dw_length * 4;
1054 state_map += state->dw_length * 4;
1055
1056 struct_type = "BLEND_STATE_ENTRY";
1057 state = intel_spec_find_struct(ctx->spec, struct_type);
1058 }
1059
1060 count = update_count(ctx, ctx->dynamic_base + state_offset,
1061 ctx->dynamic_base, state->dw_length, count);
1062
1063 for (int i = 0; i < count; i++) {
1064 fprintf(ctx->fp, "%s %d\n", struct_type, i);
1065 ctx_print_group(ctx, state, state_addr, state_map);
1066
1067 state_addr += state->dw_length * 4;
1068 state_map += state->dw_length * 4;
1069 }
1070 }
1071
1072 static void
decode_dynamic_state_pointers(struct intel_batch_decode_ctx * ctx,const char * struct_type,const uint32_t * p,int count)1073 decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
1074 const char *struct_type, const uint32_t *p,
1075 int count)
1076 {
1077 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1078
1079 uint32_t state_offset = 0;
1080
1081 struct intel_field_iterator iter;
1082 intel_field_iterator_init(&iter, inst, p, 0, false);
1083 while (intel_field_iterator_next(&iter)) {
1084 if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
1085 state_offset = iter.raw_value;
1086 break;
1087 }
1088 }
1089 decode_dynamic_state(ctx, struct_type, state_offset, count);
1090 }
1091
1092 static void
decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1093 decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx,
1094 const uint32_t *p)
1095 {
1096 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1097 uint32_t state_offset = 0;
1098 bool clip = false, sf = false, cc = false;
1099 struct intel_field_iterator iter;
1100 intel_field_iterator_init(&iter, inst, p, 0, false);
1101 while (intel_field_iterator_next(&iter)) {
1102 if (!strcmp(iter.name, "CLIP Viewport State Change"))
1103 clip = iter.raw_value;
1104 if (!strcmp(iter.name, "SF Viewport State Change"))
1105 sf = iter.raw_value;
1106 if (!strcmp(iter.name, "CC Viewport State Change"))
1107 cc = iter.raw_value;
1108 else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) {
1109 state_offset = iter.raw_value;
1110 decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1);
1111 }
1112 else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) {
1113 state_offset = iter.raw_value;
1114 decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1);
1115 }
1116 else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) {
1117 state_offset = iter.raw_value;
1118 decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1);
1119 }
1120 }
1121 }
1122
1123 static void
decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1124 decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx,
1125 const uint32_t *p)
1126 {
1127 decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
1128 }
1129
1130 static void
decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1131 decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx,
1132 const uint32_t *p)
1133 {
1134 decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
1135 }
1136
1137 static void
decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1138 decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx,
1139 const uint32_t *p)
1140 {
1141 decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 8);
1142 }
1143
1144 static void
decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1145 decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
1146 const uint32_t *p)
1147 {
1148 if (ctx->devinfo.ver != 6) {
1149 decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
1150 return;
1151 }
1152
1153 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1154
1155 uint32_t state_offset = 0;
1156 bool blend_change = false, ds_change = false, cc_change = false;
1157 struct intel_field_iterator iter;
1158 intel_field_iterator_init(&iter, inst, p, 0, false);
1159 while (intel_field_iterator_next(&iter)) {
1160 if (!strcmp(iter.name, "BLEND_STATE Change"))
1161 blend_change = iter.raw_value;
1162 else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change"))
1163 ds_change = iter.raw_value;
1164 else if (!strcmp(iter.name, "Color Calc State Pointer Valid"))
1165 cc_change = iter.raw_value;
1166 else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) {
1167 state_offset = iter.raw_value;
1168 decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1);
1169 }
1170 else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) {
1171 state_offset = iter.raw_value;
1172 decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1);
1173 }
1174 else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) {
1175 state_offset = iter.raw_value;
1176 decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1);
1177 }
1178 }
1179 }
1180
1181 static void
decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1182 decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
1183 const uint32_t *p)
1184 {
1185 decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
1186 }
1187
1188 static void
decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1189 decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
1190 const uint32_t *p)
1191 {
1192 decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
1193 }
1194
1195 static void
decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1196 decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx,
1197 const uint32_t *p)
1198 {
1199 decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1);
1200 }
1201
1202 static void
handle_gt_mode(struct intel_batch_decode_ctx * ctx,uint32_t reg_addr,uint32_t val)1203 handle_gt_mode(struct intel_batch_decode_ctx *ctx,
1204 uint32_t reg_addr, uint32_t val)
1205 {
1206 struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr);
1207
1208 assert(intel_group_get_length(reg, &val) == 1);
1209
1210 struct intel_field_iterator iter;
1211 intel_field_iterator_init(&iter, reg, &val, 0, false);
1212
1213 uint32_t bt_alignment;
1214 bool bt_alignment_mask = 0;
1215
1216 while (intel_field_iterator_next(&iter)) {
1217 if (strcmp(iter.name, "Binding Table Alignment") == 0) {
1218 bt_alignment = iter.raw_value;
1219 } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) {
1220 bt_alignment_mask = iter.raw_value;
1221 }
1222 }
1223
1224 if (bt_alignment_mask)
1225 ctx->use_256B_binding_tables = bt_alignment;
1226 }
1227
1228 struct reg_handler {
1229 const char *name;
1230 void (*handler)(struct intel_batch_decode_ctx *ctx,
1231 uint32_t reg_addr, uint32_t val);
1232 } reg_handlers[] = {
1233 { "GT_MODE", handle_gt_mode }
1234 };
1235
1236 static void
decode_load_register_imm(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1237 decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1238 {
1239 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1240 const unsigned length = intel_group_get_length(inst, p);
1241 assert(length & 1);
1242 const unsigned nr_regs = (length - 1) / 2;
1243
1244 for (unsigned i = 0; i < nr_regs; i++) {
1245 struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]);
1246 if (reg != NULL) {
1247 fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
1248 reg->name, reg->register_offset, p[2]);
1249 ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
1250
1251 for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) {
1252 if (strcmp(reg->name, reg_handlers[i].name) == 0)
1253 reg_handlers[i].handler(ctx, p[1], p[2]);
1254 }
1255 }
1256 }
1257 }
1258
1259 static void
disasm_program_from_group(struct intel_batch_decode_ctx * ctx,struct intel_group * strct,const void * map,const char * short_name,const char * type)1260 disasm_program_from_group(struct intel_batch_decode_ctx *ctx,
1261 struct intel_group *strct, const void *map,
1262 const char *short_name, const char *type)
1263 {
1264 uint64_t ksp = 0;
1265 bool is_enabled = true;
1266 struct intel_field_iterator iter;
1267
1268 intel_field_iterator_init(&iter, strct, map, 0, false);
1269
1270 while (intel_field_iterator_next(&iter)) {
1271 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
1272 ksp = iter.raw_value;
1273 } else if (strcmp(iter.name, "Enable") == 0) {
1274 is_enabled = iter.raw_value;
1275 }
1276 }
1277
1278 if (is_enabled) {
1279 ctx_disassemble_program(ctx, ksp, short_name, type);
1280 fprintf(ctx->fp, "\n");
1281 }
1282 }
1283
1284 static void
decode_vs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1285 decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1286 {
1287 struct intel_group *strct =
1288 intel_spec_find_struct(ctx->spec, "VS_STATE");
1289 if (strct == NULL) {
1290 fprintf(ctx->fp, "did not find VS_STATE info\n");
1291 return;
1292 }
1293
1294 struct intel_batch_decode_bo bind_bo =
1295 ctx_get_bo(ctx, true, offset);
1296
1297 if (bind_bo.map == NULL) {
1298 fprintf(ctx->fp, " vs state unavailable\n");
1299 return;
1300 }
1301
1302 ctx_print_group(ctx, strct, offset, bind_bo.map);
1303 disasm_program_from_group(ctx, strct, bind_bo.map, "VS", "vertex shader");
1304 }
1305
1306 static void
decode_gs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1307 decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1308 {
1309 struct intel_group *strct =
1310 intel_spec_find_struct(ctx->spec, "GS_STATE");
1311 if (strct == NULL) {
1312 fprintf(ctx->fp, "did not find GS_STATE info\n");
1313 return;
1314 }
1315
1316 struct intel_batch_decode_bo bind_bo =
1317 ctx_get_bo(ctx, true, offset);
1318
1319 if (bind_bo.map == NULL) {
1320 fprintf(ctx->fp, " gs state unavailable\n");
1321 return;
1322 }
1323
1324 ctx_print_group(ctx, strct, offset, bind_bo.map);
1325 disasm_program_from_group(ctx, strct, bind_bo.map, "GS", "geometry shader");
1326 }
1327
1328 static void
decode_clip_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1329 decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1330 {
1331 struct intel_group *strct =
1332 intel_spec_find_struct(ctx->spec, "CLIP_STATE");
1333 if (strct == NULL) {
1334 fprintf(ctx->fp, "did not find CLIP_STATE info\n");
1335 return;
1336 }
1337
1338 struct intel_batch_decode_bo bind_bo =
1339 ctx_get_bo(ctx, true, offset);
1340
1341 if (bind_bo.map == NULL) {
1342 fprintf(ctx->fp, " clip state unavailable\n");
1343 return;
1344 }
1345
1346 ctx_print_group(ctx, strct, offset, bind_bo.map);
1347 disasm_program_from_group(ctx, strct, bind_bo.map, "CL", "clip shader");
1348
1349 struct intel_group *vp_strct =
1350 intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT");
1351 if (vp_strct == NULL) {
1352 fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n");
1353 return;
1354 }
1355 uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3;
1356 struct intel_batch_decode_bo vp_bo =
1357 ctx_get_bo(ctx, true, clip_vp_offset);
1358 if (vp_bo.map == NULL) {
1359 fprintf(ctx->fp, " clip vp state unavailable\n");
1360 return;
1361 }
1362 ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map);
1363 }
1364
1365 static void
decode_sf_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1366 decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1367 {
1368 struct intel_group *strct =
1369 intel_spec_find_struct(ctx->spec, "SF_STATE");
1370 if (strct == NULL) {
1371 fprintf(ctx->fp, "did not find SF_STATE info\n");
1372 return;
1373 }
1374
1375 struct intel_batch_decode_bo bind_bo =
1376 ctx_get_bo(ctx, true, offset);
1377
1378 if (bind_bo.map == NULL) {
1379 fprintf(ctx->fp, " sf state unavailable\n");
1380 return;
1381 }
1382
1383 ctx_print_group(ctx, strct, offset, bind_bo.map);
1384 disasm_program_from_group(ctx, strct, bind_bo.map, "SF", "strips and fans shader");
1385
1386 struct intel_group *vp_strct =
1387 intel_spec_find_struct(ctx->spec, "SF_VIEWPORT");
1388 if (vp_strct == NULL) {
1389 fprintf(ctx->fp, "did not find SF_VIEWPORT info\n");
1390 return;
1391 }
1392
1393 uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3;
1394 struct intel_batch_decode_bo vp_bo =
1395 ctx_get_bo(ctx, true, sf_vp_offset);
1396 if (vp_bo.map == NULL) {
1397 fprintf(ctx->fp, " sf vp state unavailable\n");
1398 return;
1399 }
1400 ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map);
1401 }
1402
1403 static void
decode_wm_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1404 decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1405 {
1406 struct intel_group *strct =
1407 intel_spec_find_struct(ctx->spec, "WM_STATE");
1408 if (strct == NULL) {
1409 fprintf(ctx->fp, "did not find WM_STATE info\n");
1410 return;
1411 }
1412
1413 struct intel_batch_decode_bo bind_bo =
1414 ctx_get_bo(ctx, true, offset);
1415
1416 if (bind_bo.map == NULL) {
1417 fprintf(ctx->fp, " wm state unavailable\n");
1418 return;
1419 }
1420
1421 ctx_print_group(ctx, strct, offset, bind_bo.map);
1422
1423 decode_ps_kern(ctx, strct, bind_bo.map);
1424 }
1425
1426 static void
decode_cc_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1427 decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1428 {
1429 struct intel_group *strct =
1430 intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE");
1431 if (strct == NULL) {
1432 fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n");
1433 return;
1434 }
1435
1436 struct intel_batch_decode_bo bind_bo =
1437 ctx_get_bo(ctx, true, offset);
1438
1439 if (bind_bo.map == NULL) {
1440 fprintf(ctx->fp, " cc state unavailable\n");
1441 return;
1442 }
1443
1444 ctx_print_group(ctx, strct, offset, bind_bo.map);
1445
1446 struct intel_group *vp_strct =
1447 intel_spec_find_struct(ctx->spec, "CC_VIEWPORT");
1448 if (vp_strct == NULL) {
1449 fprintf(ctx->fp, "did not find CC_VIEWPORT info\n");
1450 return;
1451 }
1452 uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3;
1453 struct intel_batch_decode_bo vp_bo =
1454 ctx_get_bo(ctx, true, cc_vp_offset);
1455 if (vp_bo.map == NULL) {
1456 fprintf(ctx->fp, " cc vp state unavailable\n");
1457 return;
1458 }
1459 ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map);
1460 }
1461 static void
decode_pipelined_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1462 decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1463 {
1464 fprintf(ctx->fp, "VS State Table:\n");
1465 decode_vs_state(ctx, p[1]);
1466 if (p[2] & 1) {
1467 fprintf(ctx->fp, "GS State Table:\n");
1468 decode_gs_state(ctx, p[2] & ~1);
1469 }
1470 fprintf(ctx->fp, "Clip State Table:\n");
1471 decode_clip_state(ctx, p[3] & ~1);
1472 fprintf(ctx->fp, "SF State Table:\n");
1473 decode_sf_state(ctx, p[4]);
1474 fprintf(ctx->fp, "WM State Table:\n");
1475 decode_wm_state(ctx, p[5]);
1476 fprintf(ctx->fp, "CC State Table:\n");
1477 decode_cc_state(ctx, p[6]);
1478 }
1479
1480 static void
decode_cps_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1481 decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1482 {
1483 decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1);
1484 }
1485
1486 struct custom_decoder {
1487 const char *cmd_name;
1488 void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p);
1489 };
1490
1491 /* Special handling to be able to decode other instructions */
1492 struct custom_decoder state_handlers[] = {
1493 { "STATE_BASE_ADDRESS", handle_state_base_address },
1494 { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc },
1495 { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
1496 };
1497
1498 /* Special printing of instructions */
1499 struct custom_decoder custom_decoders[] = {
1500 { "COMPUTE_WALKER", handle_compute_walker },
1501 { "MEDIA_CURBE_LOAD", handle_media_curbe_load },
1502 { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
1503 { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
1504 { "3DSTATE_VS", decode_single_ksp },
1505 { "3DSTATE_GS", decode_single_ksp },
1506 { "3DSTATE_DS", decode_single_ksp },
1507 { "3DSTATE_HS", decode_single_ksp },
1508 { "3DSTATE_PS", decode_ps_kernels },
1509 { "3DSTATE_WM", decode_ps_kernels },
1510 { "3DSTATE_MESH_SHADER", decode_mesh_task_ksp },
1511 { "3DSTATE_TASK_SHADER", decode_mesh_task_ksp },
1512 { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
1513 { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
1514 { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
1515 { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
1516 { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
1517 { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
1518
1519 { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers },
1520 { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
1521 { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
1522 { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
1523 { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
1524 { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
1525
1526 { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
1527 { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
1528 { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
1529 { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
1530 { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
1531 { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
1532
1533 { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers },
1534 { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
1535 { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
1536 { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
1537 { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
1538 { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
1539 { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
1540 { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
1541 { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
1542 { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers },
1543 { "3DSTATE_CPS_POINTERS", decode_cps_pointers },
1544 { "CONSTANT_BUFFER", decode_gfx4_constant_buffer },
1545 };
1546
1547 static void
get_inst_color(const struct intel_batch_decode_ctx * ctx,const struct intel_group * inst,char ** const out_color,char ** const out_reset_color)1548 get_inst_color(const struct intel_batch_decode_ctx *ctx,
1549 const struct intel_group *inst,
1550 char **const out_color,
1551 char **const out_reset_color)
1552 {
1553 const char *inst_name = intel_group_get_name(inst);
1554 if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) {
1555 *out_reset_color = NORMAL;
1556 if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1557 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
1558 strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
1559 *out_color = GREEN_HEADER;
1560 else
1561 *out_color = BLUE_HEADER;
1562 } else {
1563 *out_color = NORMAL;
1564 }
1565 } else {
1566 *out_color = "";
1567 *out_reset_color = "";
1568 }
1569 }
1570
1571 struct inst_ptr {
1572 struct intel_group *inst;
1573 uint32_t *ptr;
1574 };
1575
1576 static int
compare_inst_ptr(const void * v1,const void * v2)1577 compare_inst_ptr(const void *v1, const void *v2)
1578 {
1579 const struct inst_ptr *i1 = v1, *i2 = v2;
1580 return strcmp(i1->inst->name, i2->inst->name);
1581 }
1582
1583 static void
intel_print_accumulated_instrs(struct intel_batch_decode_ctx * ctx)1584 intel_print_accumulated_instrs(struct intel_batch_decode_ctx *ctx)
1585 {
1586 struct util_dynarray arr;
1587 util_dynarray_init(&arr, NULL);
1588
1589 hash_table_foreach(ctx->commands, entry) {
1590 struct inst_ptr inst = {
1591 .inst = (struct intel_group *)entry->key,
1592 .ptr = entry->data,
1593 };
1594 util_dynarray_append(&arr, struct inst_ptr, inst);
1595 }
1596 qsort(util_dynarray_begin(&arr),
1597 util_dynarray_num_elements(&arr, struct inst_ptr),
1598 sizeof(struct inst_ptr),
1599 compare_inst_ptr);
1600
1601 fprintf(ctx->fp, "----\n");
1602 util_dynarray_foreach(&arr, struct inst_ptr, i) {
1603 char *begin_color;
1604 char *end_color;
1605 get_inst_color(ctx, i->inst, &begin_color, &end_color);
1606
1607 uint64_t offset = 0;
1608 fprintf(ctx->fp, "%s0x%08"PRIx64": 0x%08x: %-80s%s\n",
1609 begin_color, offset, i->ptr[0], i->inst->name, end_color);
1610 if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1611 ctx_print_group(ctx, i->inst, 0, i->ptr);
1612 for (int d = 0; d < ARRAY_SIZE(custom_decoders); d++) {
1613 if (strcmp(i->inst->name, custom_decoders[d].cmd_name) == 0) {
1614 custom_decoders[d].decode(ctx, i->ptr);
1615 break;
1616 }
1617 }
1618 }
1619 }
1620 util_dynarray_fini(&arr);
1621 }
1622
1623 static void
print_instr(struct intel_batch_decode_ctx * ctx,const struct intel_group * inst,const uint32_t * p,uint64_t offset)1624 print_instr(struct intel_batch_decode_ctx *ctx,
1625 const struct intel_group *inst,
1626 const uint32_t *p,
1627 uint64_t offset)
1628 {
1629 char *begin_color;
1630 char *end_color;
1631 get_inst_color(ctx, inst, &begin_color, &end_color);
1632
1633 fprintf(ctx->fp, "%s0x%08"PRIx64"%s: 0x%08x: %-80s%s\n",
1634 begin_color, offset,
1635 ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0],
1636 inst->name, end_color);
1637
1638 if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1639 ctx_print_group(ctx, inst, offset, p);
1640
1641 for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) {
1642 if (strcmp(inst->name, custom_decoders[i].cmd_name) == 0) {
1643 custom_decoders[i].decode(ctx, p);
1644 break;
1645 }
1646 }
1647 }
1648 }
1649
1650 void
intel_print_batch(struct intel_batch_decode_ctx * ctx,const uint32_t * batch,uint32_t batch_size,uint64_t batch_addr,bool from_ring)1651 intel_print_batch(struct intel_batch_decode_ctx *ctx,
1652 const uint32_t *batch, uint32_t batch_size,
1653 uint64_t batch_addr, bool from_ring)
1654 {
1655 const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1656 int length;
1657 struct intel_group *inst;
1658 const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : "";
1659
1660 if (ctx->n_batch_buffer_start >= 100) {
1661 fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n",
1662 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1663 (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0,
1664 reset_color);
1665 return;
1666 }
1667
1668 ctx->n_batch_buffer_start++;
1669
1670 for (p = batch; p < end; p += length) {
1671 inst = intel_ctx_find_instruction(ctx, p);
1672 length = intel_group_get_length(inst, p);
1673 assert(inst == NULL || length > 0);
1674 length = MAX2(1, length);
1675
1676 uint64_t offset;
1677 if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS)
1678 offset = batch_addr + ((char *)p - (char *)batch);
1679 else
1680 offset = 0;
1681
1682 if (inst == NULL) {
1683 fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n",
1684 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1685 offset, p[0], reset_color);
1686
1687 for (int i=1; i < length; i++) {
1688 fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n",
1689 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1690 offset + i * 4, p[i], reset_color);
1691 }
1692
1693 continue;
1694 }
1695
1696 if (ctx->flags & INTEL_BATCH_DECODE_ACCUMULATE) {
1697 struct hash_entry *entry = _mesa_hash_table_search(ctx->commands, inst);
1698 if (entry != NULL) {
1699 entry->data = (void *)p;
1700 } else {
1701 _mesa_hash_table_insert(ctx->commands, inst, (void *)p);
1702 }
1703
1704 if (!strcmp(inst->name, "3DPRIMITIVE") ||
1705 !strcmp(inst->name, "3DPRIMITIVE_EXTENDED") ||
1706 !strcmp(inst->name, "GPGPU_WALKER") ||
1707 !strcmp(inst->name, "3DSTATE_WM_HZ_OP") ||
1708 !strcmp(inst->name, "COMPUTE_WALKER")) {
1709 intel_print_accumulated_instrs(ctx);
1710 }
1711 } else if (ctx->filters != NULL) {
1712 if (_mesa_hash_table_search(ctx->filters, inst->name) != NULL)
1713 print_instr(ctx, inst, p, offset);
1714 } else {
1715 print_instr(ctx, inst, p, offset);
1716 }
1717
1718 for (int i = 0; i < ARRAY_SIZE(state_handlers); i++) {
1719 if (strcmp(inst->name, state_handlers[i].cmd_name) == 0) {
1720 state_handlers[i].decode(ctx, p);
1721 break;
1722 }
1723 }
1724
1725 if (strcmp(inst->name, "MI_BATCH_BUFFER_START") == 0) {
1726 uint64_t next_batch_addr = 0;
1727 bool ppgtt = false;
1728 bool second_level = false;
1729 bool predicate = false;
1730 struct intel_field_iterator iter;
1731 intel_field_iterator_init(&iter, inst, p, 0, false);
1732 while (intel_field_iterator_next(&iter)) {
1733 if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1734 next_batch_addr = iter.raw_value;
1735 } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1736 second_level = iter.raw_value;
1737 } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1738 ppgtt = iter.raw_value;
1739 } else if (strcmp(iter.name, "Predication Enable") == 0) {
1740 predicate = iter.raw_value;
1741 }
1742 }
1743
1744 if (!predicate) {
1745 struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr);
1746
1747 if (next_batch.map == NULL) {
1748 fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1749 next_batch_addr);
1750 } else {
1751 intel_print_batch(ctx, next_batch.map, next_batch.size,
1752 next_batch.addr, false);
1753 }
1754 if (second_level) {
1755 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1756 * like a subroutine call. Commands that come afterwards get
1757 * processed once the 2nd level batch buffer returns with
1758 * MI_BATCH_BUFFER_END.
1759 */
1760 continue;
1761 } else if (!from_ring) {
1762 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1763 * like a goto. Nothing after it will ever get processed. In
1764 * order to prevent the recursion from growing, we just reset the
1765 * loop and continue;
1766 */
1767 break;
1768 }
1769 }
1770 } else if (strcmp(inst->name, "MI_BATCH_BUFFER_END") == 0) {
1771 break;
1772 }
1773 }
1774
1775 ctx->n_batch_buffer_start--;
1776 }
1777
1778 void
intel_batch_stats_reset(struct intel_batch_decode_ctx * ctx)1779 intel_batch_stats_reset(struct intel_batch_decode_ctx *ctx)
1780 {
1781 _mesa_hash_table_clear(ctx->stats, NULL);
1782 }
1783
1784 void
intel_batch_stats(struct intel_batch_decode_ctx * ctx,const uint32_t * batch,uint32_t batch_size,uint64_t batch_addr,bool from_ring)1785 intel_batch_stats(struct intel_batch_decode_ctx *ctx,
1786 const uint32_t *batch, uint32_t batch_size,
1787 uint64_t batch_addr, bool from_ring)
1788 {
1789 const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1790 int length;
1791 struct intel_group *inst;
1792
1793 if (ctx->n_batch_buffer_start >= 100) {
1794 fprintf(stderr, "Max batch buffer jumps exceeded\n");
1795 return;
1796 }
1797
1798 ctx->n_batch_buffer_start++;
1799
1800 for (p = batch; p < end; p += length) {
1801 inst = intel_ctx_find_instruction(ctx, p);
1802 length = intel_group_get_length(inst, p);
1803 assert(inst == NULL || length > 0);
1804 length = MAX2(1, length);
1805
1806 const char *name =
1807 inst != NULL ? inst->name : "unknown";
1808
1809 struct hash_entry *entry = _mesa_hash_table_search(ctx->stats, name);
1810 if (entry != NULL) {
1811 entry->data = (void *)((uintptr_t)entry->data + 1);
1812 } else {
1813 _mesa_hash_table_insert(ctx->stats, name, (void *)(uintptr_t)1);
1814 }
1815
1816 if (inst == NULL)
1817 continue;
1818
1819 if (strcmp(inst->name, "MI_BATCH_BUFFER_START") == 0) {
1820 uint64_t next_batch_addr = 0;
1821 bool ppgtt = false;
1822 bool second_level = false;
1823 bool predicate = false;
1824 struct intel_field_iterator iter;
1825 intel_field_iterator_init(&iter, inst, p, 0, false);
1826 while (intel_field_iterator_next(&iter)) {
1827 if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1828 next_batch_addr = iter.raw_value;
1829 } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1830 second_level = iter.raw_value;
1831 } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1832 ppgtt = iter.raw_value;
1833 } else if (strcmp(iter.name, "Predication Enable") == 0) {
1834 predicate = iter.raw_value;
1835 }
1836 }
1837
1838 if (!predicate) {
1839 struct intel_batch_decode_bo next_batch =
1840 ctx_get_bo(ctx, ppgtt, next_batch_addr);
1841
1842 if (next_batch.map == NULL) {
1843 fprintf(stderr, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1844 next_batch_addr);
1845 } else {
1846 intel_batch_stats(ctx, next_batch.map, next_batch.size,
1847 next_batch.addr, false);
1848 }
1849 if (second_level) {
1850 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1851 * like a subroutine call. Commands that come afterwards get
1852 * processed once the 2nd level batch buffer returns with
1853 * MI_BATCH_BUFFER_END.
1854 */
1855 continue;
1856 } else if (!from_ring) {
1857 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1858 * like a goto. Nothing after it will ever get processed. In
1859 * order to prevent the recursion from growing, we just reset the
1860 * loop and continue;
1861 */
1862 break;
1863 }
1864 }
1865 } else if (strcmp(inst->name, "MI_BATCH_BUFFER_END") == 0) {
1866 break;
1867 }
1868 }
1869
1870 ctx->n_batch_buffer_start--;
1871 }
1872
1873 struct inst_stat {
1874 const char *name;
1875 uint32_t count;
1876 };
1877
1878 static int
compare_inst_stat(const void * v1,const void * v2)1879 compare_inst_stat(const void *v1, const void *v2)
1880 {
1881 const struct inst_stat *i1 = v1, *i2 = v2;
1882 return strcmp(i1->name, i2->name);
1883 }
1884
1885 void
intel_batch_print_stats(struct intel_batch_decode_ctx * ctx)1886 intel_batch_print_stats(struct intel_batch_decode_ctx *ctx)
1887 {
1888 struct util_dynarray arr;
1889 util_dynarray_init(&arr, NULL);
1890
1891 hash_table_foreach(ctx->stats, entry) {
1892 struct inst_stat inst = {
1893 .name = (const char *)entry->key,
1894 .count = (uintptr_t)entry->data,
1895 };
1896 util_dynarray_append(&arr, struct inst_stat, inst);
1897 }
1898 qsort(util_dynarray_begin(&arr),
1899 util_dynarray_num_elements(&arr, struct inst_stat),
1900 sizeof(struct inst_stat),
1901 compare_inst_stat);
1902 util_dynarray_foreach(&arr, struct inst_stat, i)
1903 fprintf(ctx->fp, "%-40s: %u\n", i->name, i->count);
1904
1905 util_dynarray_fini(&arr);
1906 }
1907