• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "intel_decoder.h"
25 #include "intel_decoder_private.h"
26 
27 #include "util/macros.h"
28 #include "util/u_debug.h"
29 #include "util/u_dynarray.h"
30 #include "util/u_math.h" /* Needed for ROUND_DOWN_TO */
31 
32 #include <string.h>
33 
34 static const struct debug_control debug_control[] = {
35    { "color",      INTEL_BATCH_DECODE_IN_COLOR },
36    { "full",       INTEL_BATCH_DECODE_FULL },
37    { "offsets",    INTEL_BATCH_DECODE_OFFSETS },
38    { "floats",     INTEL_BATCH_DECODE_FLOATS },
39    { "surfaces",   INTEL_BATCH_DECODE_SURFACES },
40    { "accumulate", INTEL_BATCH_DECODE_ACCUMULATE },
41    { "vb-data",    INTEL_BATCH_DECODE_VB_DATA },
42    { NULL,    0 }
43 };
44 
45 void
intel_batch_decode_ctx_init(struct intel_batch_decode_ctx * ctx,const struct intel_device_info * devinfo,FILE * fp,enum intel_batch_decode_flags flags,const char * xml_path,struct intel_batch_decode_bo (* get_bo)(void *,bool,uint64_t),unsigned (* get_state_size)(void *,uint64_t,uint64_t),void * user_data)46 intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
47                             const struct intel_device_info *devinfo,
48                             FILE *fp, enum intel_batch_decode_flags flags,
49                             const char *xml_path,
50                             struct intel_batch_decode_bo (*get_bo)(void *,
51                                                                    bool,
52                                                                    uint64_t),
53                             unsigned (*get_state_size)(void *, uint64_t,
54                                                        uint64_t),
55                             void *user_data)
56 {
57    memset(ctx, 0, sizeof(*ctx));
58 
59    ctx->devinfo = *devinfo;
60    ctx->get_bo = get_bo;
61    ctx->get_state_size = get_state_size;
62    ctx->user_data = user_data;
63    ctx->fp = fp;
64    ctx->flags = parse_enable_string(getenv("INTEL_DECODE"), flags, debug_control);
65    ctx->max_vbo_decoded_lines = -1; /* No limit! */
66    ctx->engine = INTEL_ENGINE_CLASS_RENDER;
67 
68    if (xml_path == NULL)
69       ctx->spec = intel_spec_load(devinfo);
70    else
71       ctx->spec = intel_spec_load_from_path(devinfo, xml_path);
72 
73    ctx->commands =
74       _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
75    ctx->stats =
76       _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
77 
78    const char *filters = getenv("INTEL_DECODE_FILTERS");
79    if (filters != NULL) {
80       ctx->filters =
81          _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
82       do {
83          const char *term = filters;
84          if (strlen(term) == 0)
85             break;
86 
87          filters = strstr(term, ",");
88 
89          char *str = ralloc_strndup(ctx->filters, term,
90                                     filters != NULL ?
91                                     (filters - term) : strlen(term));
92          _mesa_hash_table_insert(ctx->filters, str, str);
93 
94          if (filters == NULL)
95             break;
96 
97          filters++;
98       } while (true);
99    }
100 }
101 
102 void
intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx * ctx)103 intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)
104 {
105    if (ctx->filters != NULL)
106       _mesa_hash_table_destroy(ctx->filters, NULL);
107    _mesa_hash_table_destroy(ctx->commands, NULL);
108    _mesa_hash_table_destroy(ctx->stats, NULL);
109    intel_spec_destroy(ctx->spec);
110 }
111 
112 #define CSI "\e["
113 #define RED_COLOR    CSI "31m"
114 #define BLUE_HEADER  CSI "0;44m" CSI "1;37m"
115 #define GREEN_HEADER CSI "1;42m"
116 #define NORMAL       CSI "0m"
117 
118 static void
ctx_print_group(struct intel_batch_decode_ctx * ctx,const struct intel_group * group,uint64_t address,const void * map)119 ctx_print_group(struct intel_batch_decode_ctx *ctx,
120                 const struct intel_group *group,
121                 uint64_t address, const void *map)
122 {
123    intel_print_group(ctx->fp, group, address, map, 0,
124                    (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0);
125 }
126 
127 struct intel_batch_decode_bo
ctx_get_bo(struct intel_batch_decode_ctx * ctx,bool ppgtt,uint64_t addr)128 ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)
129 {
130    if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) {
131       /* On Broadwell and above, we have 48-bit addresses which consume two
132        * dwords.  Some packets require that these get stored in a "canonical
133        * form" which means that bit 47 is sign-extended through the upper
134        * bits. In order to correctly handle those aub dumps, we need to mask
135        * off the top 16 bits.
136        */
137       addr &= (~0ull >> 16);
138    }
139 
140    struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr);
141 
142    if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0))
143       bo.addr &= (~0ull >> 16);
144 
145    /* We may actually have an offset into the bo */
146    if (bo.map != NULL) {
147       assert(bo.addr <= addr);
148       uint64_t offset = addr - bo.addr;
149       bo.map += offset;
150       bo.addr += offset;
151       bo.size -= offset;
152    }
153 
154    return bo;
155 }
156 
157 static int
update_count(struct intel_batch_decode_ctx * ctx,uint64_t address,uint64_t base_address,unsigned element_dwords,unsigned guess)158 update_count(struct intel_batch_decode_ctx *ctx,
159              uint64_t address,
160              uint64_t base_address,
161              unsigned element_dwords,
162              unsigned guess)
163 {
164    unsigned size = 0;
165 
166    if (ctx->get_state_size)
167       size = ctx->get_state_size(ctx->user_data, address, base_address);
168 
169    if (size > 0)
170       return size / (sizeof(uint32_t) * element_dwords);
171 
172    /* In the absence of any information, just guess arbitrarily. */
173    return guess;
174 }
175 
176 static inline void
ctx_disassemble_program(struct intel_batch_decode_ctx * ctx,uint32_t ksp,const char * short_name,const char * name)177 ctx_disassemble_program(struct intel_batch_decode_ctx *ctx,
178                         uint32_t ksp,
179                         const char *short_name,
180                         const char *name)
181 {
182    ctx->disassemble_program(ctx, ksp, short_name, name);
183 }
184 
185 /* Heuristic to determine whether a uint32_t is probably actually a float
186  * (http://stackoverflow.com/a/2953466)
187  */
188 
189 static bool
probably_float(uint32_t bits)190 probably_float(uint32_t bits)
191 {
192    int exp = ((bits & 0x7f800000U) >> 23) - 127;
193    uint32_t mant = bits & 0x007fffff;
194 
195    /* +- 0.0 */
196    if (exp == -127 && mant == 0)
197       return true;
198 
199    /* +- 1 billionth to 1 billion */
200    if (-30 <= exp && exp <= 30)
201       return true;
202 
203    /* some value with only a few binary digits */
204    if ((mant & 0x0000ffff) == 0)
205       return true;
206 
207    return false;
208 }
209 
210 static void
ctx_print_buffer(struct intel_batch_decode_ctx * ctx,struct intel_batch_decode_bo bo,uint32_t read_length,uint32_t pitch,int max_lines)211 ctx_print_buffer(struct intel_batch_decode_ctx *ctx,
212                  struct intel_batch_decode_bo bo,
213                  uint32_t read_length,
214                  uint32_t pitch,
215                  int max_lines)
216 {
217    const uint32_t *dw_end =
218          bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
219 
220    int column_count = 0, pitch_col_count = 0, line_count = -1;
221    for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
222       if (pitch_col_count * 4 == pitch || column_count == 8) {
223          fprintf(ctx->fp, "\n");
224          column_count = 0;
225          if (pitch_col_count * 4 == pitch)
226             pitch_col_count = 0;
227          line_count++;
228 
229          if (max_lines >= 0 && line_count >= max_lines)
230             break;
231       }
232       fprintf(ctx->fp, column_count == 0 ? "  " : " ");
233 
234       if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw))
235          fprintf(ctx->fp, "  %8.2f", *(float *) dw);
236       else
237          fprintf(ctx->fp, "  0x%08x", *dw);
238 
239       column_count++;
240       pitch_col_count++;
241    }
242    fprintf(ctx->fp, "\n");
243 }
244 
245 static struct intel_group *
intel_ctx_find_instruction(struct intel_batch_decode_ctx * ctx,const uint32_t * p)246 intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
247 {
248    return intel_spec_find_instruction(ctx->spec, ctx->engine, p);
249 }
250 
251 static void
handle_state_base_address(struct intel_batch_decode_ctx * ctx,const uint32_t * p)252 handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
253 {
254    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
255 
256    struct intel_field_iterator iter;
257    intel_field_iterator_init(&iter, inst, p, 0, false);
258 
259    uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0;
260    bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0;
261 
262    while (intel_field_iterator_next(&iter)) {
263       if (strcmp(iter.name, "Surface State Base Address") == 0) {
264          surface_base = iter.raw_value;
265       } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
266          dynamic_base = iter.raw_value;
267       } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
268          instruction_base = iter.raw_value;
269       } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) {
270          surface_modify = iter.raw_value;
271       } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
272          dynamic_modify = iter.raw_value;
273       } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
274          instruction_modify = iter.raw_value;
275       }
276    }
277 
278    if (dynamic_modify)
279       ctx->dynamic_base = dynamic_base;
280 
281    if (surface_modify)
282       ctx->surface_base = surface_base;
283 
284    if (instruction_modify)
285       ctx->instruction_base = instruction_base;
286 }
287 
288 static void
handle_binding_table_pool_alloc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)289 handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx,
290                                 const uint32_t *p)
291 {
292    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
293 
294    struct intel_field_iterator iter;
295    intel_field_iterator_init(&iter, inst, p, 0, false);
296 
297    uint64_t bt_pool_base = 0;
298    bool bt_pool_enable = false;
299 
300    while (intel_field_iterator_next(&iter)) {
301       if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) {
302          bt_pool_base = iter.raw_value;
303       } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) {
304          bt_pool_enable = iter.raw_value;
305       }
306    }
307 
308    if (bt_pool_enable || ctx->devinfo.verx10 >= 125) {
309       ctx->bt_pool_base = bt_pool_base;
310    } else {
311       ctx->bt_pool_base = 0;
312    }
313 }
314 
315 static void
dump_binding_table(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)316 dump_binding_table(struct intel_batch_decode_ctx *ctx,
317                    uint32_t offset, int count)
318 {
319    struct intel_group *strct =
320       intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
321    if (strct == NULL) {
322       fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
323       return;
324    }
325 
326    /* Most platforms use a 16-bit pointer with 32B alignment in bits 15:5. */
327    uint32_t btp_alignment = 32;
328    uint32_t btp_pointer_bits = 16;
329 
330    if (ctx->devinfo.verx10 >= 125) {
331       /* The pointer is now 21-bit with 32B alignment in bits 20:5. */
332       btp_pointer_bits = 21;
333    } else if (ctx->use_256B_binding_tables) {
334       /* When 256B binding tables are enabled, we have to shift the offset
335        * which is stored in bits 15:5 but interpreted as bits 18:8 of the
336        * actual offset.  The effective pointer is 19-bit with 256B alignment.
337        */
338       offset <<= 3;
339       btp_pointer_bits = 19;
340       btp_alignment = 256;
341    }
342 
343    const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base :
344                                                      ctx->surface_base;
345 
346    if (count < 0) {
347       count = update_count(ctx, bt_pool_base + offset,
348                            bt_pool_base, 1, 32);
349    }
350 
351    if (offset % btp_alignment != 0 || offset >= (1u << btp_pointer_bits)) {
352       fprintf(ctx->fp, "  invalid binding table pointer\n");
353       return;
354    }
355 
356    struct intel_batch_decode_bo bind_bo =
357       ctx_get_bo(ctx, true, bt_pool_base + offset);
358 
359    if (bind_bo.map == NULL) {
360       fprintf(ctx->fp, "  binding table unavailable\n");
361       return;
362    }
363 
364    const uint32_t *pointers = bind_bo.map;
365    for (int i = 0; i < count; i++) {
366       if (((uintptr_t)&pointers[i] >= ((uintptr_t)bind_bo.map + bind_bo.size)))
367          break;
368 
369       uint64_t addr = ctx->surface_base + pointers[i];
370       struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
371       uint32_t size = strct->dw_length * 4;
372 
373       if (pointers[i] % 32 != 0 ||
374           addr < bo.addr || addr + size > bo.addr + bo.size) {
375          fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]);
376          continue;
377       }
378 
379       fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]);
380       if (ctx->flags & INTEL_BATCH_DECODE_SURFACES)
381          ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
382    }
383 }
384 
385 static void
dump_samplers(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)386 dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)
387 {
388    struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE");
389    uint64_t state_addr = ctx->dynamic_base + offset;
390 
391    assert(count > 0);
392 
393    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
394    const void *state_map = bo.map;
395 
396    if (state_map == NULL) {
397       fprintf(ctx->fp, "  samplers unavailable\n");
398       return;
399    }
400 
401    if (offset % 32 != 0) {
402       fprintf(ctx->fp, "  invalid sampler state pointer\n");
403       return;
404    }
405 
406    const unsigned sampler_state_size = strct->dw_length * 4;
407 
408    if (count * sampler_state_size >= bo.size) {
409       fprintf(ctx->fp, "  sampler state ends after bo ends\n");
410       assert(!"sampler state ends after bo ends");
411       return;
412    }
413 
414    for (int i = 0; i < count; i++) {
415       fprintf(ctx->fp, "sampler state %d\n", i);
416       if (ctx->flags & INTEL_BATCH_DECODE_SAMPLERS)
417          ctx_print_group(ctx, strct, state_addr, state_map);
418       state_addr += sampler_state_size;
419       state_map += sampler_state_size;
420    }
421 }
422 
423 static void
handle_interface_descriptor_data(struct intel_batch_decode_ctx * ctx,struct intel_group * desc,const uint32_t * p)424 handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx,
425                                  struct intel_group *desc, const uint32_t *p)
426 {
427    uint64_t ksp = 0;
428    uint32_t sampler_offset = 0, sampler_count = 0;
429    uint32_t binding_table_offset = 0, binding_entry_count = 0;
430 
431    struct intel_field_iterator iter;
432    intel_field_iterator_init(&iter, desc, p, 0, false);
433    while (intel_field_iterator_next(&iter)) {
434       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
435          ksp = strtoll(iter.value, NULL, 16);
436       } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
437          sampler_offset = strtol(iter.value, NULL, 16);
438       } else if (strcmp(iter.name, "Sampler Count") == 0) {
439          sampler_count = strtol(iter.value, NULL, 10);
440       } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
441          binding_table_offset = strtol(iter.value, NULL, 16);
442       } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
443          binding_entry_count = strtol(iter.value, NULL, 10);
444       }
445    }
446 
447    ctx_disassemble_program(ctx, ksp, "CS", "compute shader");
448    fprintf(ctx->fp, "\n");
449 
450    if (sampler_count)
451       dump_samplers(ctx, sampler_offset, sampler_count);
452    if (binding_entry_count)
453       dump_binding_table(ctx, binding_table_offset, binding_entry_count);
454 }
455 
456 static void
handle_media_interface_descriptor_load(struct intel_batch_decode_ctx * ctx,const uint32_t * p)457 handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx,
458                                        const uint32_t *p)
459 {
460    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
461    struct intel_group *desc =
462       intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
463 
464    struct intel_field_iterator iter;
465    intel_field_iterator_init(&iter, inst, p, 0, false);
466    uint32_t descriptor_offset = 0;
467    int descriptor_count = 0;
468    while (intel_field_iterator_next(&iter)) {
469       if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
470          descriptor_offset = strtol(iter.value, NULL, 16);
471       } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
472          descriptor_count =
473             strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
474       }
475    }
476 
477    uint64_t desc_addr = ctx->dynamic_base + descriptor_offset;
478    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr);
479    const void *desc_map = bo.map;
480 
481    if (desc_map == NULL) {
482       fprintf(ctx->fp, "  interface descriptors unavailable\n");
483       return;
484    }
485 
486    for (int i = 0; i < descriptor_count; i++) {
487       fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
488 
489       ctx_print_group(ctx, desc, desc_addr, desc_map);
490 
491       handle_interface_descriptor_data(ctx, desc, desc_map);
492 
493       desc_map += desc->dw_length;
494       desc_addr += desc->dw_length * 4;
495    }
496 }
497 
498 static void
handle_compute_walker(struct intel_batch_decode_ctx * ctx,const uint32_t * p)499 handle_compute_walker(struct intel_batch_decode_ctx *ctx,
500                       const uint32_t *p)
501 {
502    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
503 
504    struct intel_field_iterator iter;
505    intel_field_iterator_init(&iter, inst, p, 0, false);
506    while (intel_field_iterator_next(&iter)) {
507       if (strcmp(iter.name, "body") == 0) {
508          intel_field_iterator_init(&iter, iter.struct_desc,
509                                    &iter.p[iter.start_bit / 32],
510                                    0, false);
511       } else if (strcmp(iter.name, "Interface Descriptor") == 0) {
512          handle_interface_descriptor_data(ctx, iter.struct_desc,
513                                           &iter.p[iter.start_bit / 32]);
514       }
515    }
516 }
517 
518 static void
handle_media_curbe_load(struct intel_batch_decode_ctx * ctx,const uint32_t * p)519 handle_media_curbe_load(struct intel_batch_decode_ctx *ctx,
520                         const uint32_t *p)
521 {
522    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
523 
524    struct intel_field_iterator iter;
525    intel_field_iterator_init(&iter, inst, p, 0, false);
526 
527    uint32_t dynamic_state_offset = 0;
528    uint32_t dynamic_state_length = 0;
529 
530    while (intel_field_iterator_next(&iter)) {
531       if (strcmp(iter.name, "CURBE Data Start Address") == 0) {
532          dynamic_state_offset = iter.raw_value;
533       } else if (strcmp(iter.name, "CURBE Total Data Length") == 0) {
534          dynamic_state_length = iter.raw_value;
535       }
536    }
537 
538    if (dynamic_state_length > 0) {
539       struct intel_batch_decode_bo buffer =
540          ctx_get_bo(ctx, true, ctx->dynamic_base + dynamic_state_offset);
541       if (buffer.map != NULL)
542          ctx_print_buffer(ctx, buffer, dynamic_state_length, 0, -1);
543    }
544 }
545 
546 static void
handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)547 handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx,
548                               const uint32_t *p)
549 {
550    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
551    struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
552 
553    struct intel_batch_decode_bo vb = {};
554    uint32_t vb_size = 0;
555    int index = -1;
556    int pitch = -1;
557    bool ready = false;
558 
559    struct intel_field_iterator iter;
560    intel_field_iterator_init(&iter, inst, p, 0, false);
561    while (intel_field_iterator_next(&iter)) {
562       if (iter.struct_desc != vbs)
563          continue;
564 
565       struct intel_field_iterator vbs_iter;
566       intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false);
567       while (intel_field_iterator_next(&vbs_iter)) {
568          if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
569             index = vbs_iter.raw_value;
570          } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
571             pitch = vbs_iter.raw_value;
572          } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
573             vb = ctx_get_bo(ctx, true, vbs_iter.raw_value);
574          } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
575             vb_size = vbs_iter.raw_value;
576             ready = true;
577          } else if (strcmp(vbs_iter.name, "End Address") == 0) {
578             if (vb.map && vbs_iter.raw_value >= vb.addr)
579                vb_size = (vbs_iter.raw_value + 1) - vb.addr;
580             else
581                vb_size = 0;
582             ready = true;
583          }
584 
585          if (!ready)
586             continue;
587 
588          fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
589 
590          if (vb.map == NULL) {
591             fprintf(ctx->fp, "  buffer contents unavailable\n");
592             continue;
593          }
594 
595          if (vb.map == 0 || vb_size == 0)
596             continue;
597 
598          if (ctx->flags & INTEL_BATCH_DECODE_VB_DATA)
599             ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines);
600 
601          vb.map = NULL;
602          vb_size = 0;
603          index = -1;
604          pitch = -1;
605          ready = false;
606       }
607    }
608 }
609 
610 static void
handle_3dstate_index_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)611 handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx,
612                             const uint32_t *p)
613 {
614    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
615 
616    struct intel_batch_decode_bo ib = {};
617    uint32_t ib_size = 0;
618    uint32_t format = 0;
619 
620    struct intel_field_iterator iter;
621    intel_field_iterator_init(&iter, inst, p, 0, false);
622    while (intel_field_iterator_next(&iter)) {
623       if (strcmp(iter.name, "Index Format") == 0) {
624          format = iter.raw_value;
625       } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
626          ib = ctx_get_bo(ctx, true, iter.raw_value);
627       } else if (strcmp(iter.name, "Buffer Size") == 0) {
628          ib_size = iter.raw_value;
629       }
630    }
631 
632    if (ib.map == NULL) {
633       fprintf(ctx->fp, "  buffer contents unavailable\n");
634       return;
635    }
636 
637    const void *m = ib.map;
638    const void *ib_end = ib.map + MIN2(ib.size, ib_size);
639    for (int i = 0; m < ib_end && i < 10; i++) {
640       switch (format) {
641       case 0:
642          fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
643          m += 1;
644          break;
645       case 1:
646          fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
647          m += 2;
648          break;
649       case 2:
650          fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
651          m += 4;
652          break;
653       }
654    }
655 
656    if (m < ib_end)
657       fprintf(ctx->fp, "...");
658    fprintf(ctx->fp, "\n");
659 }
660 
661 static void
decode_single_ksp(struct intel_batch_decode_ctx * ctx,const uint32_t * p)662 decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
663 {
664    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
665 
666    uint64_t ksp = 0;
667    bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
668    bool is_enabled = true;
669 
670    struct intel_field_iterator iter;
671    intel_field_iterator_init(&iter, inst, p, 0, false);
672    while (intel_field_iterator_next(&iter)) {
673       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
674          ksp = iter.raw_value;
675       } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
676          is_simd8 = iter.raw_value;
677       } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
678          is_simd8 = strcmp(iter.value, "SIMD8") == 0;
679       } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
680          is_simd8 = strcmp(iter.value, "SIMD8") == 0;
681       } else if (strcmp(iter.name, "Enable") == 0) {
682          is_enabled = iter.raw_value;
683       }
684    }
685 
686    const char *type =
687       strcmp(inst->name,   "VS_STATE") == 0 ? "vertex shader" :
688       strcmp(inst->name,   "GS_STATE") == 0 ? "geometry shader" :
689       strcmp(inst->name,   "SF_STATE") == 0 ? "strips and fans shader" :
690       strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
691       strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
692       strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" :
693       strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
694       strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
695       NULL;
696    const char *short_name =
697       strcmp(inst->name,   "VS_STATE") == 0 ? "VS" :
698       strcmp(inst->name,   "GS_STATE") == 0 ? "GS" :
699       strcmp(inst->name,   "SF_STATE") == 0 ? "SF" :
700       strcmp(inst->name, "CLIP_STATE") == 0 ? "CL" :
701       strcmp(inst->name, "3DSTATE_DS") == 0 ? "DS" :
702       strcmp(inst->name, "3DSTATE_HS") == 0 ? "HS" :
703       strcmp(inst->name, "3DSTATE_VS") == 0 ? "VS" :
704       strcmp(inst->name, "3DSTATE_GS") == 0 ? "GS" :
705       NULL;
706 
707    if (is_enabled) {
708       ctx_disassemble_program(ctx, ksp, short_name, type);
709       fprintf(ctx->fp, "\n");
710    }
711 }
712 
713 static void
decode_mesh_task_ksp(struct intel_batch_decode_ctx * ctx,const uint32_t * p)714 decode_mesh_task_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
715 {
716    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
717 
718    uint64_t ksp = 0;
719    uint64_t local_x_maximum = 0;
720    uint64_t threads = 0;
721 
722    struct intel_field_iterator iter;
723    intel_field_iterator_init(&iter, inst, p, 0, false);
724    while (intel_field_iterator_next(&iter)) {
725       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
726          ksp = iter.raw_value;
727       } else if (strcmp(iter.name, "Local X Maximum") == 0) {
728          local_x_maximum = iter.raw_value;
729       } else if (strcmp(iter.name, "Number of Threads in GPGPU Thread Group") == 0) {
730          threads = iter.raw_value;
731       }
732    }
733 
734    const char *type =
735       strcmp(inst->name,   "3DSTATE_MESH_SHADER") == 0 ? "mesh shader" :
736       strcmp(inst->name,   "3DSTATE_TASK_SHADER") == 0 ? "task shader" :
737       NULL;
738    const char *short_name =
739       strcmp(inst->name,   "3DSTATE_MESH_SHADER") == 0 ? "MS" :
740       strcmp(inst->name,   "3DSTATE_TASK_SHADER") == 0 ? "TS" :
741       NULL;
742 
743    if (threads && local_x_maximum) {
744       ctx_disassemble_program(ctx, ksp, short_name, type);
745       fprintf(ctx->fp, "\n");
746    }
747 }
748 
749 static void
decode_ps_kern(struct intel_batch_decode_ctx * ctx,struct intel_group * inst,const uint32_t * p)750 decode_ps_kern(struct intel_batch_decode_ctx *ctx,
751                struct intel_group *inst, const uint32_t *p)
752 {
753    bool single_ksp = ctx->devinfo.ver == 4;
754    uint64_t ksp[3] = {0, 0, 0};
755    bool enabled[3] = {false, false, false};
756 
757    struct intel_field_iterator iter;
758    intel_field_iterator_init(&iter, inst, p, 0, false);
759    while (intel_field_iterator_next(&iter)) {
760       if (strncmp(iter.name, "Kernel Start Pointer ",
761                   strlen("Kernel Start Pointer ")) == 0) {
762          int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
763          ksp[idx] = strtol(iter.value, NULL, 16);
764       } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
765          enabled[0] = strcmp(iter.value, "true") == 0;
766       } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
767          enabled[1] = strcmp(iter.value, "true") == 0;
768       } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
769          enabled[2] = strcmp(iter.value, "true") == 0;
770       }
771    }
772 
773    if (single_ksp)
774       ksp[1] = ksp[2] = ksp[0];
775 
776    /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
777    if (enabled[0] + enabled[1] + enabled[2] == 1) {
778       if (enabled[1]) {
779          ksp[1] = ksp[0];
780          ksp[0] = 0;
781       } else if (enabled[2]) {
782          ksp[2] = ksp[0];
783          ksp[0] = 0;
784       }
785    } else {
786       uint64_t tmp = ksp[1];
787       ksp[1] = ksp[2];
788       ksp[2] = tmp;
789    }
790 
791    if (enabled[0])
792       ctx_disassemble_program(ctx, ksp[0], "FS8", "SIMD8 fragment shader");
793    if (enabled[1])
794       ctx_disassemble_program(ctx, ksp[1], "FS16", "SIMD16 fragment shader");
795    if (enabled[2])
796       ctx_disassemble_program(ctx, ksp[2], "FS32", "SIMD32 fragment shader");
797 
798    if (enabled[0] || enabled[1] || enabled[2])
799       fprintf(ctx->fp, "\n");
800 }
801 
802 static void
decode_ps_kern_xe2(struct intel_batch_decode_ctx * ctx,struct intel_group * inst,const uint32_t * p)803 decode_ps_kern_xe2(struct intel_batch_decode_ctx *ctx,
804                      struct intel_group *inst, const uint32_t *p)
805 {
806    uint64_t ksp[2] = {0, 0};
807    bool enabled[2] = {false, false};
808    int width[2] = {0, 0};
809 
810    struct intel_field_iterator iter;
811    intel_field_iterator_init(&iter, inst, p, 0, false);
812    while (intel_field_iterator_next(&iter)) {
813       if (strncmp(iter.name, "Kernel Start Pointer ",
814                   strlen("Kernel Start Pointer ")) == 0) {
815          int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
816          ksp[idx] = strtol(iter.value, NULL, 16);
817       } else if (strcmp(iter.name, "Kernel 0 Enable") == 0) {
818          enabled[0] = strcmp(iter.value, "true") == 0;
819       } else if (strcmp(iter.name, "Kernel 1 Enable") == 0) {
820          enabled[1] = strcmp(iter.value, "true") == 0;
821       } else if (strcmp(iter.name, "Kernel[0] : SIMD Width") == 0) {
822          width[0] = strncmp(iter.value, "0 ", 2) == 0 ? 16 : 32;
823       } else if (strcmp(iter.name, "Kernel[1] : SIMD Width") == 0) {
824          width[1] = strncmp(iter.value, "0 ", 2) == 0 ? 16 : 32;
825       }
826    }
827 
828    for (int i = 0; i < 2; i++) {
829       if (enabled[i])
830          ctx_disassemble_program(ctx, ksp[i], "FS",
831                                  width[i] == 16 ?
832                                  "SIMD16 fragment shader" :
833                                  "SIMD32 fragment shader");
834    }
835 
836    if (enabled[0] || enabled[1])
837       fprintf(ctx->fp, "\n");
838 }
839 
840 static void
decode_ps_kernels(struct intel_batch_decode_ctx * ctx,const uint32_t * p)841 decode_ps_kernels(struct intel_batch_decode_ctx *ctx,
842                   const uint32_t *p)
843 {
844    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
845    if (ctx->devinfo.ver >= 20)
846       decode_ps_kern_xe2(ctx, inst, p);
847    else
848       decode_ps_kern(ctx, inst, p);
849 }
850 
851 static void
decode_3dstate_constant_all(struct intel_batch_decode_ctx * ctx,const uint32_t * p)852 decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
853 {
854    struct intel_group *inst =
855       intel_spec_find_instruction(ctx->spec, ctx->engine, p);
856    struct intel_group *body =
857       intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA");
858 
859    uint32_t read_length[4] = {0};
860    struct intel_batch_decode_bo buffer[4];
861    memset(buffer, 0, sizeof(buffer));
862 
863    struct intel_field_iterator outer;
864    intel_field_iterator_init(&outer, inst, p, 0, false);
865    int idx = 0;
866    while (intel_field_iterator_next(&outer)) {
867       if (outer.struct_desc != body)
868          continue;
869 
870       struct intel_field_iterator iter;
871       intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
872                               0, false);
873       while (intel_field_iterator_next(&iter)) {
874          if (!strcmp(iter.name, "Pointer To Constant Buffer")) {
875             buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value);
876          } else if (!strcmp(iter.name, "Constant Buffer Read Length")) {
877             read_length[idx] = iter.raw_value;
878          }
879       }
880       idx++;
881    }
882 
883    for (int i = 0; i < 4; i++) {
884       if (read_length[i] == 0 || buffer[i].map == NULL)
885          continue;
886 
887       unsigned size = read_length[i] * 32;
888       fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
889 
890       ctx_print_buffer(ctx, buffer[i], size, 0, -1);
891    }
892 }
893 
894 static void
decode_3dstate_constant(struct intel_batch_decode_ctx * ctx,const uint32_t * p)895 decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
896 {
897    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
898    struct intel_group *body =
899       intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
900 
901    uint32_t read_length[4] = {0};
902    uint64_t read_addr[4] = {0};
903 
904    struct intel_field_iterator outer;
905    intel_field_iterator_init(&outer, inst, p, 0, false);
906    while (intel_field_iterator_next(&outer)) {
907       if (outer.struct_desc != body)
908          continue;
909 
910       struct intel_field_iterator iter;
911       intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
912                               0, false);
913 
914       while (intel_field_iterator_next(&iter)) {
915          int idx;
916          if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) {
917             read_length[idx] = iter.raw_value;
918          } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) {
919             read_addr[idx] = iter.raw_value;
920          }
921       }
922 
923       for (int i = 0; i < 4; i++) {
924          if (read_length[i] == 0)
925             continue;
926 
927          struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]);
928          if (!buffer.map) {
929             fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
930             continue;
931          }
932 
933          unsigned size = read_length[i] * 32;
934          fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
935 
936          ctx_print_buffer(ctx, buffer, size, 0, -1);
937       }
938    }
939 }
940 
941 static void
decode_gfx4_constant_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)942 decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
943 {
944    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
945    uint64_t read_length = 0, read_addr = 0, valid = 0;
946    struct intel_field_iterator iter;
947    intel_field_iterator_init(&iter, inst, p, 0, false);
948 
949    while (intel_field_iterator_next(&iter)) {
950       if (!strcmp(iter.name, "Buffer Length")) {
951          read_length = iter.raw_value;
952       } else if (!strcmp(iter.name, "Valid")) {
953          valid = iter.raw_value;
954       } else if (!strcmp(iter.name, "Buffer Starting Address")) {
955          read_addr = iter.raw_value;
956       }
957    }
958 
959    if (!valid)
960       return;
961 
962    struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr);
963    if (!buffer.map) {
964       fprintf(ctx->fp, "constant buffer unavailable\n");
965       return;
966    }
967    unsigned size = (read_length + 1) * 16 * sizeof(float);
968    fprintf(ctx->fp, "constant buffer size %u\n", size);
969 
970    ctx_print_buffer(ctx, buffer, size, 0, -1);
971 }
972 
973 
974 static void
decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)975 decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
976                                            const uint32_t *p)
977 {
978    fprintf(ctx->fp, "VS Binding Table:\n");
979    dump_binding_table(ctx, p[1], -1);
980 
981    fprintf(ctx->fp, "GS Binding Table:\n");
982    dump_binding_table(ctx, p[2], -1);
983 
984    if (ctx->devinfo.ver < 6) {
985       fprintf(ctx->fp, "CLIP Binding Table:\n");
986       dump_binding_table(ctx, p[3], -1);
987       fprintf(ctx->fp, "SF Binding Table:\n");
988       dump_binding_table(ctx, p[4], -1);
989       fprintf(ctx->fp, "PS Binding Table:\n");
990       dump_binding_table(ctx, p[5], -1);
991    } else {
992       fprintf(ctx->fp, "PS Binding Table:\n");
993       dump_binding_table(ctx, p[3], -1);
994    }
995 }
996 
997 static void
decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)998 decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
999                                       const uint32_t *p)
1000 {
1001    dump_binding_table(ctx, p[1], -1);
1002 }
1003 
1004 static void
decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1005 decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx,
1006                                       const uint32_t *p)
1007 {
1008    dump_samplers(ctx, p[1], 1);
1009 }
1010 
1011 static void
decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1012 decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx,
1013                                            const uint32_t *p)
1014 {
1015    dump_samplers(ctx, p[1], 1);
1016    dump_samplers(ctx, p[2], 1);
1017    dump_samplers(ctx, p[3], 1);
1018 }
1019 
1020 static bool
str_ends_with(const char * str,const char * end)1021 str_ends_with(const char *str, const char *end)
1022 {
1023    int offset = strlen(str) - strlen(end);
1024    if (offset < 0)
1025       return false;
1026 
1027    return strcmp(str + offset, end) == 0;
1028 }
1029 
1030 static void
decode_dynamic_state(struct intel_batch_decode_ctx * ctx,const char * struct_type,uint32_t state_offset,int count)1031 decode_dynamic_state(struct intel_batch_decode_ctx *ctx,
1032                        const char *struct_type, uint32_t state_offset,
1033                        int count)
1034 {
1035    uint64_t state_addr = ctx->dynamic_base + state_offset;
1036    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
1037    const void *state_map = bo.map;
1038 
1039    if (state_map == NULL) {
1040       fprintf(ctx->fp, "  dynamic %s state unavailable\n", struct_type);
1041       return;
1042    }
1043 
1044    struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type);
1045    if (strcmp(struct_type, "BLEND_STATE") == 0) {
1046       /* Blend states are different from the others because they have a header
1047        * struct called BLEND_STATE which is followed by a variable number of
1048        * BLEND_STATE_ENTRY structs.
1049        */
1050       fprintf(ctx->fp, "%s\n", struct_type);
1051       ctx_print_group(ctx, state, state_addr, state_map);
1052 
1053       state_addr += state->dw_length * 4;
1054       state_map += state->dw_length * 4;
1055 
1056       struct_type = "BLEND_STATE_ENTRY";
1057       state = intel_spec_find_struct(ctx->spec, struct_type);
1058    }
1059 
1060    count = update_count(ctx, ctx->dynamic_base + state_offset,
1061                         ctx->dynamic_base, state->dw_length, count);
1062 
1063    for (int i = 0; i < count; i++) {
1064       fprintf(ctx->fp, "%s %d\n", struct_type, i);
1065       ctx_print_group(ctx, state, state_addr, state_map);
1066 
1067       state_addr += state->dw_length * 4;
1068       state_map += state->dw_length * 4;
1069    }
1070 }
1071 
1072 static void
decode_dynamic_state_pointers(struct intel_batch_decode_ctx * ctx,const char * struct_type,const uint32_t * p,int count)1073 decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
1074                               const char *struct_type, const uint32_t *p,
1075                               int count)
1076 {
1077    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1078 
1079    uint32_t state_offset = 0;
1080 
1081    struct intel_field_iterator iter;
1082    intel_field_iterator_init(&iter, inst, p, 0, false);
1083    while (intel_field_iterator_next(&iter)) {
1084       if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
1085          state_offset = iter.raw_value;
1086          break;
1087       }
1088    }
1089    decode_dynamic_state(ctx, struct_type, state_offset, count);
1090 }
1091 
1092 static void
decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1093 decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx,
1094                                        const uint32_t *p)
1095 {
1096    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1097    uint32_t state_offset = 0;
1098    bool clip = false, sf = false, cc = false;
1099    struct intel_field_iterator iter;
1100    intel_field_iterator_init(&iter, inst, p, 0, false);
1101    while (intel_field_iterator_next(&iter)) {
1102       if (!strcmp(iter.name, "CLIP Viewport State Change"))
1103          clip = iter.raw_value;
1104       if (!strcmp(iter.name, "SF Viewport State Change"))
1105          sf = iter.raw_value;
1106       if (!strcmp(iter.name, "CC Viewport State Change"))
1107          cc = iter.raw_value;
1108       else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) {
1109          state_offset = iter.raw_value;
1110          decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1);
1111       }
1112       else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) {
1113          state_offset = iter.raw_value;
1114          decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1);
1115       }
1116       else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) {
1117          state_offset = iter.raw_value;
1118          decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1);
1119       }
1120    }
1121 }
1122 
1123 static void
decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1124 decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx,
1125                                           const uint32_t *p)
1126 {
1127    decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
1128 }
1129 
1130 static void
decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1131 decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx,
1132                                                const uint32_t *p)
1133 {
1134    decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
1135 }
1136 
1137 static void
decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1138 decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx,
1139                                     const uint32_t *p)
1140 {
1141    decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 8);
1142 }
1143 
1144 static void
decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1145 decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
1146                                  const uint32_t *p)
1147 {
1148    if (ctx->devinfo.ver != 6) {
1149       decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
1150       return;
1151    }
1152 
1153    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1154 
1155    uint32_t state_offset = 0;
1156    bool blend_change = false, ds_change = false, cc_change = false;
1157    struct intel_field_iterator iter;
1158    intel_field_iterator_init(&iter, inst, p, 0, false);
1159    while (intel_field_iterator_next(&iter)) {
1160       if (!strcmp(iter.name, "BLEND_STATE Change"))
1161          blend_change = iter.raw_value;
1162       else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change"))
1163          ds_change = iter.raw_value;
1164       else if (!strcmp(iter.name, "Color Calc State Pointer Valid"))
1165          cc_change = iter.raw_value;
1166       else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) {
1167          state_offset = iter.raw_value;
1168          decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1);
1169       }
1170       else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) {
1171          state_offset = iter.raw_value;
1172          decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1);
1173       }
1174       else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) {
1175          state_offset = iter.raw_value;
1176          decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1);
1177       }
1178    }
1179 }
1180 
1181 static void
decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1182 decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
1183                                  const uint32_t *p)
1184 {
1185    decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
1186 }
1187 
1188 static void
decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1189 decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
1190                                       const uint32_t *p)
1191 {
1192    decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
1193 }
1194 
1195 static void
decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1196 decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx,
1197                                           const uint32_t *p)
1198 {
1199    decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1);
1200 }
1201 
1202 static void
handle_gt_mode(struct intel_batch_decode_ctx * ctx,uint32_t reg_addr,uint32_t val)1203 handle_gt_mode(struct intel_batch_decode_ctx *ctx,
1204                uint32_t reg_addr, uint32_t val)
1205 {
1206    struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr);
1207 
1208    assert(intel_group_get_length(reg, &val) == 1);
1209 
1210    struct intel_field_iterator iter;
1211    intel_field_iterator_init(&iter, reg, &val, 0, false);
1212 
1213    uint32_t bt_alignment;
1214    bool bt_alignment_mask = 0;
1215 
1216    while (intel_field_iterator_next(&iter)) {
1217       if (strcmp(iter.name, "Binding Table Alignment") == 0) {
1218          bt_alignment = iter.raw_value;
1219       } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) {
1220          bt_alignment_mask = iter.raw_value;
1221       }
1222    }
1223 
1224    if (bt_alignment_mask)
1225       ctx->use_256B_binding_tables = bt_alignment;
1226 }
1227 
1228 struct reg_handler {
1229    const char *name;
1230    void (*handler)(struct intel_batch_decode_ctx *ctx,
1231                    uint32_t reg_addr, uint32_t val);
1232 } reg_handlers[] = {
1233    { "GT_MODE", handle_gt_mode }
1234 };
1235 
1236 static void
decode_load_register_imm(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1237 decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1238 {
1239    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1240    const unsigned length = intel_group_get_length(inst, p);
1241    assert(length & 1);
1242    const unsigned nr_regs = (length - 1) / 2;
1243 
1244    for (unsigned i = 0; i < nr_regs; i++) {
1245       struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]);
1246       if (reg != NULL) {
1247          fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
1248                  reg->name, reg->register_offset, p[2]);
1249          ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
1250 
1251          for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) {
1252             if (strcmp(reg->name, reg_handlers[i].name) == 0)
1253                reg_handlers[i].handler(ctx, p[1], p[2]);
1254          }
1255       }
1256    }
1257 }
1258 
1259 static void
disasm_program_from_group(struct intel_batch_decode_ctx * ctx,struct intel_group * strct,const void * map,const char * short_name,const char * type)1260 disasm_program_from_group(struct intel_batch_decode_ctx *ctx,
1261                           struct intel_group *strct, const void *map,
1262                           const char *short_name, const char *type)
1263 {
1264    uint64_t ksp = 0;
1265    bool is_enabled = true;
1266    struct intel_field_iterator iter;
1267 
1268    intel_field_iterator_init(&iter, strct, map, 0, false);
1269 
1270    while (intel_field_iterator_next(&iter)) {
1271       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
1272          ksp = iter.raw_value;
1273       } else if (strcmp(iter.name, "Enable") == 0) {
1274          is_enabled = iter.raw_value;
1275       }
1276    }
1277 
1278    if (is_enabled) {
1279       ctx_disassemble_program(ctx, ksp, short_name, type);
1280       fprintf(ctx->fp, "\n");
1281    }
1282 }
1283 
1284 static void
decode_vs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1285 decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1286 {
1287    struct intel_group *strct =
1288       intel_spec_find_struct(ctx->spec, "VS_STATE");
1289    if (strct == NULL) {
1290       fprintf(ctx->fp, "did not find VS_STATE info\n");
1291       return;
1292    }
1293 
1294    struct intel_batch_decode_bo bind_bo =
1295       ctx_get_bo(ctx, true, offset);
1296 
1297    if (bind_bo.map == NULL) {
1298       fprintf(ctx->fp, " vs state unavailable\n");
1299       return;
1300    }
1301 
1302    ctx_print_group(ctx, strct, offset, bind_bo.map);
1303    disasm_program_from_group(ctx, strct, bind_bo.map, "VS", "vertex shader");
1304 }
1305 
1306 static void
decode_gs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1307 decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1308 {
1309    struct intel_group *strct =
1310       intel_spec_find_struct(ctx->spec, "GS_STATE");
1311    if (strct == NULL) {
1312       fprintf(ctx->fp, "did not find GS_STATE info\n");
1313       return;
1314    }
1315 
1316    struct intel_batch_decode_bo bind_bo =
1317       ctx_get_bo(ctx, true, offset);
1318 
1319    if (bind_bo.map == NULL) {
1320       fprintf(ctx->fp, " gs state unavailable\n");
1321       return;
1322    }
1323 
1324    ctx_print_group(ctx, strct, offset, bind_bo.map);
1325    disasm_program_from_group(ctx, strct, bind_bo.map, "GS", "geometry shader");
1326 }
1327 
1328 static void
decode_clip_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1329 decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1330 {
1331    struct intel_group *strct =
1332       intel_spec_find_struct(ctx->spec, "CLIP_STATE");
1333    if (strct == NULL) {
1334       fprintf(ctx->fp, "did not find CLIP_STATE info\n");
1335       return;
1336    }
1337 
1338    struct intel_batch_decode_bo bind_bo =
1339       ctx_get_bo(ctx, true, offset);
1340 
1341    if (bind_bo.map == NULL) {
1342       fprintf(ctx->fp, " clip state unavailable\n");
1343       return;
1344    }
1345 
1346    ctx_print_group(ctx, strct, offset, bind_bo.map);
1347    disasm_program_from_group(ctx, strct, bind_bo.map, "CL", "clip shader");
1348 
1349    struct intel_group *vp_strct =
1350       intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT");
1351    if (vp_strct == NULL) {
1352       fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n");
1353       return;
1354    }
1355    uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3;
1356    struct intel_batch_decode_bo vp_bo =
1357       ctx_get_bo(ctx, true, clip_vp_offset);
1358    if (vp_bo.map == NULL) {
1359       fprintf(ctx->fp, " clip vp state unavailable\n");
1360       return;
1361    }
1362    ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map);
1363 }
1364 
1365 static void
decode_sf_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1366 decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1367 {
1368    struct intel_group *strct =
1369       intel_spec_find_struct(ctx->spec, "SF_STATE");
1370    if (strct == NULL) {
1371       fprintf(ctx->fp, "did not find SF_STATE info\n");
1372       return;
1373    }
1374 
1375    struct intel_batch_decode_bo bind_bo =
1376       ctx_get_bo(ctx, true, offset);
1377 
1378    if (bind_bo.map == NULL) {
1379       fprintf(ctx->fp, " sf state unavailable\n");
1380       return;
1381    }
1382 
1383    ctx_print_group(ctx, strct, offset, bind_bo.map);
1384    disasm_program_from_group(ctx, strct, bind_bo.map, "SF", "strips and fans shader");
1385 
1386    struct intel_group *vp_strct =
1387       intel_spec_find_struct(ctx->spec, "SF_VIEWPORT");
1388    if (vp_strct == NULL) {
1389       fprintf(ctx->fp, "did not find SF_VIEWPORT info\n");
1390       return;
1391    }
1392 
1393    uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3;
1394    struct intel_batch_decode_bo vp_bo =
1395       ctx_get_bo(ctx, true, sf_vp_offset);
1396    if (vp_bo.map == NULL) {
1397       fprintf(ctx->fp, " sf vp state unavailable\n");
1398       return;
1399    }
1400    ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map);
1401 }
1402 
1403 static void
decode_wm_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1404 decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1405 {
1406    struct intel_group *strct =
1407       intel_spec_find_struct(ctx->spec, "WM_STATE");
1408    if (strct == NULL) {
1409       fprintf(ctx->fp, "did not find WM_STATE info\n");
1410       return;
1411    }
1412 
1413    struct intel_batch_decode_bo bind_bo =
1414       ctx_get_bo(ctx, true, offset);
1415 
1416    if (bind_bo.map == NULL) {
1417       fprintf(ctx->fp, " wm state unavailable\n");
1418       return;
1419    }
1420 
1421    ctx_print_group(ctx, strct, offset, bind_bo.map);
1422 
1423    decode_ps_kern(ctx, strct, bind_bo.map);
1424 }
1425 
1426 static void
decode_cc_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1427 decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1428 {
1429    struct intel_group *strct =
1430       intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE");
1431    if (strct == NULL) {
1432       fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n");
1433       return;
1434    }
1435 
1436    struct intel_batch_decode_bo bind_bo =
1437       ctx_get_bo(ctx, true, offset);
1438 
1439    if (bind_bo.map == NULL) {
1440       fprintf(ctx->fp, " cc state unavailable\n");
1441       return;
1442    }
1443 
1444    ctx_print_group(ctx, strct, offset, bind_bo.map);
1445 
1446    struct intel_group *vp_strct =
1447       intel_spec_find_struct(ctx->spec, "CC_VIEWPORT");
1448    if (vp_strct == NULL) {
1449       fprintf(ctx->fp, "did not find CC_VIEWPORT info\n");
1450       return;
1451    }
1452    uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3;
1453    struct intel_batch_decode_bo vp_bo =
1454       ctx_get_bo(ctx, true, cc_vp_offset);
1455    if (vp_bo.map == NULL) {
1456       fprintf(ctx->fp, " cc vp state unavailable\n");
1457       return;
1458    }
1459    ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map);
1460 }
1461 static void
decode_pipelined_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1462 decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1463 {
1464    fprintf(ctx->fp, "VS State Table:\n");
1465    decode_vs_state(ctx, p[1]);
1466    if (p[2] & 1) {
1467       fprintf(ctx->fp, "GS State Table:\n");
1468       decode_gs_state(ctx, p[2] & ~1);
1469    }
1470    fprintf(ctx->fp, "Clip State Table:\n");
1471    decode_clip_state(ctx, p[3] & ~1);
1472    fprintf(ctx->fp, "SF State Table:\n");
1473    decode_sf_state(ctx, p[4]);
1474    fprintf(ctx->fp, "WM State Table:\n");
1475    decode_wm_state(ctx, p[5]);
1476    fprintf(ctx->fp, "CC State Table:\n");
1477    decode_cc_state(ctx, p[6]);
1478 }
1479 
1480 static void
decode_cps_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1481 decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1482 {
1483    decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1);
1484 }
1485 
1486 struct custom_decoder {
1487    const char *cmd_name;
1488    void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p);
1489 };
1490 
1491 /* Special handling to be able to decode other instructions */
1492 struct custom_decoder state_handlers[] = {
1493    { "STATE_BASE_ADDRESS", handle_state_base_address },
1494    { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc },
1495    { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
1496 };
1497 
1498 /* Special printing of instructions */
1499 struct custom_decoder custom_decoders[] = {
1500    { "COMPUTE_WALKER", handle_compute_walker },
1501    { "MEDIA_CURBE_LOAD", handle_media_curbe_load },
1502    { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
1503    { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
1504    { "3DSTATE_VS", decode_single_ksp },
1505    { "3DSTATE_GS", decode_single_ksp },
1506    { "3DSTATE_DS", decode_single_ksp },
1507    { "3DSTATE_HS", decode_single_ksp },
1508    { "3DSTATE_PS", decode_ps_kernels },
1509    { "3DSTATE_WM", decode_ps_kernels },
1510    { "3DSTATE_MESH_SHADER", decode_mesh_task_ksp },
1511    { "3DSTATE_TASK_SHADER", decode_mesh_task_ksp },
1512    { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
1513    { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
1514    { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
1515    { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
1516    { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
1517    { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
1518 
1519    { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers },
1520    { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
1521    { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
1522    { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
1523    { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
1524    { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
1525 
1526    { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
1527    { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
1528    { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
1529    { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
1530    { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
1531    { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
1532 
1533    { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers },
1534    { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
1535    { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
1536    { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
1537    { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
1538    { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
1539    { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
1540    { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
1541    { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
1542    { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers },
1543    { "3DSTATE_CPS_POINTERS", decode_cps_pointers },
1544    { "CONSTANT_BUFFER", decode_gfx4_constant_buffer },
1545 };
1546 
1547 static void
get_inst_color(const struct intel_batch_decode_ctx * ctx,const struct intel_group * inst,char ** const out_color,char ** const out_reset_color)1548 get_inst_color(const struct intel_batch_decode_ctx *ctx,
1549                const struct intel_group *inst,
1550                char **const out_color,
1551                char **const out_reset_color)
1552 {
1553    const char *inst_name = intel_group_get_name(inst);
1554    if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) {
1555       *out_reset_color = NORMAL;
1556       if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1557          if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
1558              strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
1559             *out_color = GREEN_HEADER;
1560          else
1561             *out_color = BLUE_HEADER;
1562       } else {
1563          *out_color = NORMAL;
1564       }
1565    } else {
1566       *out_color = "";
1567       *out_reset_color = "";
1568    }
1569 }
1570 
1571 struct inst_ptr {
1572    struct intel_group *inst;
1573    uint32_t           *ptr;
1574 };
1575 
1576 static int
compare_inst_ptr(const void * v1,const void * v2)1577 compare_inst_ptr(const void *v1, const void *v2)
1578 {
1579    const struct inst_ptr *i1 = v1, *i2 = v2;
1580    return strcmp(i1->inst->name, i2->inst->name);
1581 }
1582 
1583 static void
intel_print_accumulated_instrs(struct intel_batch_decode_ctx * ctx)1584 intel_print_accumulated_instrs(struct intel_batch_decode_ctx *ctx)
1585 {
1586    struct util_dynarray arr;
1587    util_dynarray_init(&arr, NULL);
1588 
1589    hash_table_foreach(ctx->commands, entry) {
1590       struct inst_ptr inst = {
1591          .inst = (struct intel_group *)entry->key,
1592          .ptr  = entry->data,
1593       };
1594       util_dynarray_append(&arr, struct inst_ptr, inst);
1595    }
1596    qsort(util_dynarray_begin(&arr),
1597          util_dynarray_num_elements(&arr, struct inst_ptr),
1598          sizeof(struct inst_ptr),
1599          compare_inst_ptr);
1600 
1601    fprintf(ctx->fp, "----\n");
1602    util_dynarray_foreach(&arr, struct inst_ptr, i) {
1603       char *begin_color;
1604       char *end_color;
1605       get_inst_color(ctx, i->inst, &begin_color, &end_color);
1606 
1607       uint64_t offset = 0;
1608       fprintf(ctx->fp, "%s0x%08"PRIx64":  0x%08x:  %-80s%s\n",
1609               begin_color, offset, i->ptr[0], i->inst->name, end_color);
1610       if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1611          ctx_print_group(ctx, i->inst, 0, i->ptr);
1612          for (int d = 0; d < ARRAY_SIZE(custom_decoders); d++) {
1613             if (strcmp(i->inst->name, custom_decoders[d].cmd_name) == 0) {
1614                custom_decoders[d].decode(ctx, i->ptr);
1615                break;
1616             }
1617          }
1618       }
1619    }
1620    util_dynarray_fini(&arr);
1621 }
1622 
1623 static void
print_instr(struct intel_batch_decode_ctx * ctx,const struct intel_group * inst,const uint32_t * p,uint64_t offset)1624 print_instr(struct intel_batch_decode_ctx *ctx,
1625             const struct intel_group *inst,
1626             const uint32_t *p,
1627             uint64_t offset)
1628 {
1629    char *begin_color;
1630    char *end_color;
1631    get_inst_color(ctx, inst, &begin_color, &end_color);
1632 
1633    fprintf(ctx->fp, "%s0x%08"PRIx64"%s:  0x%08x:  %-80s%s\n",
1634            begin_color, offset,
1635            ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0],
1636            inst->name, end_color);
1637 
1638    if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1639       ctx_print_group(ctx, inst, offset, p);
1640 
1641       for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) {
1642          if (strcmp(inst->name, custom_decoders[i].cmd_name) == 0) {
1643                   custom_decoders[i].decode(ctx, p);
1644                   break;
1645          }
1646       }
1647    }
1648 }
1649 
1650 void
intel_print_batch(struct intel_batch_decode_ctx * ctx,const uint32_t * batch,uint32_t batch_size,uint64_t batch_addr,bool from_ring)1651 intel_print_batch(struct intel_batch_decode_ctx *ctx,
1652                   const uint32_t *batch, uint32_t batch_size,
1653                   uint64_t batch_addr, bool from_ring)
1654 {
1655    const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1656    int length;
1657    struct intel_group *inst;
1658    const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : "";
1659 
1660    if (ctx->n_batch_buffer_start >= 100) {
1661       fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n",
1662               (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1663               (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0,
1664               reset_color);
1665       return;
1666    }
1667 
1668    ctx->n_batch_buffer_start++;
1669 
1670    for (p = batch; p < end; p += length) {
1671       inst = intel_ctx_find_instruction(ctx, p);
1672       length = intel_group_get_length(inst, p);
1673       assert(inst == NULL || length > 0);
1674       length = MAX2(1, length);
1675 
1676       uint64_t offset;
1677       if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS)
1678          offset = batch_addr + ((char *)p - (char *)batch);
1679       else
1680          offset = 0;
1681 
1682       if (inst == NULL) {
1683          fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n",
1684                  (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1685                  offset, p[0], reset_color);
1686 
1687          for (int i=1; i < length; i++) {
1688             fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n",
1689                  (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1690                  offset + i * 4, p[i], reset_color);
1691          }
1692 
1693          continue;
1694       }
1695 
1696       if (ctx->flags & INTEL_BATCH_DECODE_ACCUMULATE) {
1697          struct hash_entry *entry = _mesa_hash_table_search(ctx->commands, inst);
1698          if (entry != NULL) {
1699             entry->data = (void *)p;
1700          } else {
1701             _mesa_hash_table_insert(ctx->commands, inst, (void *)p);
1702          }
1703 
1704          if (!strcmp(inst->name, "3DPRIMITIVE") ||
1705              !strcmp(inst->name, "3DPRIMITIVE_EXTENDED") ||
1706              !strcmp(inst->name, "GPGPU_WALKER") ||
1707              !strcmp(inst->name, "3DSTATE_WM_HZ_OP") ||
1708              !strcmp(inst->name, "COMPUTE_WALKER")) {
1709             intel_print_accumulated_instrs(ctx);
1710          }
1711       } else if (ctx->filters != NULL) {
1712          if (_mesa_hash_table_search(ctx->filters, inst->name) != NULL)
1713             print_instr(ctx, inst, p, offset);
1714       } else {
1715          print_instr(ctx, inst, p, offset);
1716       }
1717 
1718       for (int i = 0; i < ARRAY_SIZE(state_handlers); i++) {
1719          if (strcmp(inst->name, state_handlers[i].cmd_name) == 0) {
1720             state_handlers[i].decode(ctx, p);
1721             break;
1722          }
1723       }
1724 
1725       if (strcmp(inst->name, "MI_BATCH_BUFFER_START") == 0) {
1726          uint64_t next_batch_addr = 0;
1727          bool ppgtt = false;
1728          bool second_level = false;
1729          bool predicate = false;
1730          struct intel_field_iterator iter;
1731          intel_field_iterator_init(&iter, inst, p, 0, false);
1732          while (intel_field_iterator_next(&iter)) {
1733             if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1734                next_batch_addr = iter.raw_value;
1735             } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1736                second_level = iter.raw_value;
1737             } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1738                ppgtt = iter.raw_value;
1739             } else if (strcmp(iter.name, "Predication Enable") == 0) {
1740                predicate = iter.raw_value;
1741             }
1742          }
1743 
1744          if (!predicate) {
1745             struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr);
1746 
1747             if (next_batch.map == NULL) {
1748                fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1749                        next_batch_addr);
1750             } else {
1751                intel_print_batch(ctx, next_batch.map, next_batch.size,
1752                                  next_batch.addr, false);
1753             }
1754             if (second_level) {
1755                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1756                 * like a subroutine call.  Commands that come afterwards get
1757                 * processed once the 2nd level batch buffer returns with
1758                 * MI_BATCH_BUFFER_END.
1759                 */
1760                continue;
1761             } else if (!from_ring) {
1762                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1763                 * like a goto.  Nothing after it will ever get processed.  In
1764                 * order to prevent the recursion from growing, we just reset the
1765                 * loop and continue;
1766                 */
1767                break;
1768             }
1769          }
1770       } else if (strcmp(inst->name, "MI_BATCH_BUFFER_END") == 0) {
1771          break;
1772       }
1773    }
1774 
1775    ctx->n_batch_buffer_start--;
1776 }
1777 
1778 void
intel_batch_stats_reset(struct intel_batch_decode_ctx * ctx)1779 intel_batch_stats_reset(struct intel_batch_decode_ctx *ctx)
1780 {
1781    _mesa_hash_table_clear(ctx->stats, NULL);
1782 }
1783 
1784 void
intel_batch_stats(struct intel_batch_decode_ctx * ctx,const uint32_t * batch,uint32_t batch_size,uint64_t batch_addr,bool from_ring)1785 intel_batch_stats(struct intel_batch_decode_ctx *ctx,
1786                   const uint32_t *batch, uint32_t batch_size,
1787                   uint64_t batch_addr, bool from_ring)
1788 {
1789    const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1790    int length;
1791    struct intel_group *inst;
1792 
1793    if (ctx->n_batch_buffer_start >= 100) {
1794       fprintf(stderr, "Max batch buffer jumps exceeded\n");
1795       return;
1796    }
1797 
1798    ctx->n_batch_buffer_start++;
1799 
1800    for (p = batch; p < end; p += length) {
1801       inst = intel_ctx_find_instruction(ctx, p);
1802       length = intel_group_get_length(inst, p);
1803       assert(inst == NULL || length > 0);
1804       length = MAX2(1, length);
1805 
1806       const char *name =
1807          inst != NULL ? inst->name : "unknown";
1808 
1809       struct hash_entry *entry = _mesa_hash_table_search(ctx->stats, name);
1810       if (entry != NULL) {
1811          entry->data = (void *)((uintptr_t)entry->data + 1);
1812       } else {
1813          _mesa_hash_table_insert(ctx->stats, name, (void *)(uintptr_t)1);
1814       }
1815 
1816       if (inst == NULL)
1817          continue;
1818 
1819       if (strcmp(inst->name, "MI_BATCH_BUFFER_START") == 0) {
1820          uint64_t next_batch_addr = 0;
1821          bool ppgtt = false;
1822          bool second_level = false;
1823          bool predicate = false;
1824          struct intel_field_iterator iter;
1825          intel_field_iterator_init(&iter, inst, p, 0, false);
1826          while (intel_field_iterator_next(&iter)) {
1827             if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1828                next_batch_addr = iter.raw_value;
1829             } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1830                second_level = iter.raw_value;
1831             } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1832                ppgtt = iter.raw_value;
1833             } else if (strcmp(iter.name, "Predication Enable") == 0) {
1834                predicate = iter.raw_value;
1835             }
1836          }
1837 
1838          if (!predicate) {
1839             struct intel_batch_decode_bo next_batch =
1840                ctx_get_bo(ctx, ppgtt, next_batch_addr);
1841 
1842             if (next_batch.map == NULL) {
1843                fprintf(stderr, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1844                        next_batch_addr);
1845             } else {
1846                intel_batch_stats(ctx, next_batch.map, next_batch.size,
1847                                  next_batch.addr, false);
1848             }
1849             if (second_level) {
1850                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1851                 * like a subroutine call.  Commands that come afterwards get
1852                 * processed once the 2nd level batch buffer returns with
1853                 * MI_BATCH_BUFFER_END.
1854                 */
1855                continue;
1856             } else if (!from_ring) {
1857                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1858                 * like a goto.  Nothing after it will ever get processed.  In
1859                 * order to prevent the recursion from growing, we just reset the
1860                 * loop and continue;
1861                 */
1862                break;
1863             }
1864          }
1865       } else if (strcmp(inst->name, "MI_BATCH_BUFFER_END") == 0) {
1866          break;
1867       }
1868    }
1869 
1870    ctx->n_batch_buffer_start--;
1871 }
1872 
1873 struct inst_stat {
1874    const char *name;
1875    uint32_t    count;
1876 };
1877 
1878 static int
compare_inst_stat(const void * v1,const void * v2)1879 compare_inst_stat(const void *v1, const void *v2)
1880 {
1881    const struct inst_stat *i1 = v1, *i2 = v2;
1882    return strcmp(i1->name, i2->name);
1883 }
1884 
1885 void
intel_batch_print_stats(struct intel_batch_decode_ctx * ctx)1886 intel_batch_print_stats(struct intel_batch_decode_ctx *ctx)
1887 {
1888    struct util_dynarray arr;
1889    util_dynarray_init(&arr, NULL);
1890 
1891    hash_table_foreach(ctx->stats, entry) {
1892       struct inst_stat inst = {
1893          .name = (const char *)entry->key,
1894          .count = (uintptr_t)entry->data,
1895       };
1896       util_dynarray_append(&arr, struct inst_stat, inst);
1897    }
1898    qsort(util_dynarray_begin(&arr),
1899          util_dynarray_num_elements(&arr, struct inst_stat),
1900          sizeof(struct inst_stat),
1901          compare_inst_stat);
1902    util_dynarray_foreach(&arr, struct inst_stat, i)
1903       fprintf(ctx->fp, "%-40s: %u\n", i->name, i->count);
1904 
1905    util_dynarray_fini(&arr);
1906 }
1907