1 /*
2 * Copyright (C) 2017-2019 Alyssa Rosenzweig
3 * Copyright (C) 2017-2019 Connor Abbott
4 * Copyright (C) 2019 Collabora, Ltd.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 */
25
26 #include "genxml/gen_macros.h"
27 #include "util/set.h"
28 #include "decode.h"
29
30 #if PAN_ARCH <= 9
31
32 static void
pandecode_primitive(struct pandecode_context * ctx,const struct mali_primitive_packed * p)33 pandecode_primitive(struct pandecode_context *ctx,
34 const struct mali_primitive_packed *p)
35 {
36 pan_unpack(p, PRIMITIVE, primitive);
37 DUMP_UNPACKED(ctx, PRIMITIVE, primitive, "Primitive:\n");
38
39 #if PAN_ARCH <= 7
40 /* Validate an index buffer is present if we need one. TODO: verify
41 * relationship between invocation_count and index_count */
42
43 if (primitive.indices) {
44 /* Grab the size */
45 unsigned size = (primitive.index_type == MALI_INDEX_TYPE_UINT32)
46 ? sizeof(uint32_t)
47 : primitive.index_type;
48
49 /* Ensure we got a size, and if so, validate the index buffer
50 * is large enough to hold a full set of indices of the given
51 * size */
52
53 if (!size)
54 pandecode_log(ctx, "// XXX: index size missing\n");
55 else
56 pandecode_validate_buffer(ctx, primitive.indices,
57 primitive.index_count * size);
58 } else if (primitive.index_type)
59 pandecode_log(ctx, "// XXX: unexpected index size\n");
60 #endif
61 }
62
63 #if PAN_ARCH <= 7
64 static void
pandecode_attributes(struct pandecode_context * ctx,uint64_t addr,int count,bool varying,enum mali_job_type job_type)65 pandecode_attributes(struct pandecode_context *ctx, uint64_t addr, int count,
66 bool varying, enum mali_job_type job_type)
67 {
68 char *prefix = varying ? "Varying" : "Attribute";
69 assert(addr);
70
71 if (!count) {
72 pandecode_log(ctx, "// warn: No %s records\n", prefix);
73 return;
74 }
75
76 MAP_ADDR(ctx, ATTRIBUTE_BUFFER, addr, cl);
77
78 for (int i = 0; i < count; ++i) {
79 pan_unpack(&cl[i], ATTRIBUTE_BUFFER, temp);
80 DUMP_UNPACKED(ctx, ATTRIBUTE_BUFFER, temp, "%s:\n", prefix);
81
82 switch (temp.type) {
83 case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR_WRITE_REDUCTION:
84 case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR: {
85 pan_cast_and_unpack(&cl[i + 1], ATTRIBUTE_BUFFER_CONTINUATION_NPOT,
86 temp2);
87 pan_print(ctx->dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2,
88 (ctx->indent + 1) * 2);
89 i++;
90 break;
91 }
92 case MALI_ATTRIBUTE_TYPE_3D_LINEAR:
93 case MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED: {
94 pan_cast_and_unpack(&cl[i + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D,
95 temp2);
96 pan_print(ctx->dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2,
97 (ctx->indent + 1) * 2);
98 i++;
99 break;
100 }
101 default:
102 break;
103 }
104 }
105 pandecode_log(ctx, "\n");
106 }
107
108 static unsigned
pandecode_attribute_meta(struct pandecode_context * ctx,int count,uint64_t attribute,bool varying)109 pandecode_attribute_meta(struct pandecode_context *ctx, int count,
110 uint64_t attribute, bool varying)
111 {
112 unsigned max = 0;
113
114 for (int i = 0; i < count; ++i, attribute += pan_size(ATTRIBUTE)) {
115 MAP_ADDR(ctx, ATTRIBUTE, attribute, cl);
116 pan_unpack(cl, ATTRIBUTE, a);
117 DUMP_UNPACKED(ctx, ATTRIBUTE, a, "%s:\n",
118 varying ? "Varying" : "Attribute");
119 max = MAX2(max, a.buffer_index);
120 }
121
122 pandecode_log(ctx, "\n");
123 return MIN2(max + 1, 256);
124 }
125
126 /* return bits [lo, hi) of word */
127 static uint32_t
bits(uint32_t word,uint32_t lo,uint32_t hi)128 bits(uint32_t word, uint32_t lo, uint32_t hi)
129 {
130 if (hi - lo >= 32)
131 return word; // avoid undefined behavior with the shift
132
133 if (lo >= 32)
134 return 0;
135
136 return (word >> lo) & ((1 << (hi - lo)) - 1);
137 }
138
139 static void
pandecode_invocation(struct pandecode_context * ctx,const struct mali_invocation_packed * i)140 pandecode_invocation(struct pandecode_context *ctx,
141 const struct mali_invocation_packed *i)
142 {
143 /* Decode invocation_count. See the comment before the definition of
144 * invocation_count for an explanation.
145 */
146 pan_unpack(i, INVOCATION, invocation);
147
148 unsigned size_x =
149 bits(invocation.invocations, 0, invocation.size_y_shift) + 1;
150 unsigned size_y = bits(invocation.invocations, invocation.size_y_shift,
151 invocation.size_z_shift) +
152 1;
153 unsigned size_z = bits(invocation.invocations, invocation.size_z_shift,
154 invocation.workgroups_x_shift) +
155 1;
156
157 unsigned groups_x =
158 bits(invocation.invocations, invocation.workgroups_x_shift,
159 invocation.workgroups_y_shift) +
160 1;
161 unsigned groups_y =
162 bits(invocation.invocations, invocation.workgroups_y_shift,
163 invocation.workgroups_z_shift) +
164 1;
165 unsigned groups_z =
166 bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1;
167
168 pandecode_log(ctx, "Invocation (%d, %d, %d) x (%d, %d, %d)\n", size_x,
169 size_y, size_z, groups_x, groups_y, groups_z);
170
171 DUMP_UNPACKED(ctx, INVOCATION, invocation, "Invocation:\n")
172 }
173
174 static void
pandecode_textures(struct pandecode_context * ctx,uint64_t textures,unsigned texture_count)175 pandecode_textures(struct pandecode_context *ctx, uint64_t textures,
176 unsigned texture_count)
177 {
178 if (!textures)
179 return;
180
181 pandecode_log(ctx, "Textures %" PRIx64 ":\n", textures);
182 ctx->indent++;
183
184 #if PAN_ARCH >= 6
185 const void *cl =
186 pandecode_fetch_gpu_mem(ctx, textures, pan_size(TEXTURE) * texture_count);
187
188 for (unsigned tex = 0; tex < texture_count; ++tex)
189 GENX(pandecode_texture)(ctx, cl + pan_size(TEXTURE) * tex, tex);
190 #else
191 uint64_t *PANDECODE_PTR_VAR(ctx, u, textures);
192
193 for (int tex = 0; tex < texture_count; ++tex) {
194 uint64_t *PANDECODE_PTR_VAR(ctx, u, textures + tex * sizeof(uint64_t));
195 char *a = pointer_as_memory_reference(ctx, *u);
196 pandecode_log(ctx, "%s,\n", a);
197 free(a);
198 }
199
200 /* Now, finally, descend down into the texture descriptor */
201 for (unsigned tex = 0; tex < texture_count; ++tex) {
202 uint64_t *PANDECODE_PTR_VAR(ctx, u, textures + tex * sizeof(uint64_t));
203 GENX(pandecode_texture)(ctx, *u, tex);
204 }
205 #endif
206 ctx->indent--;
207 pandecode_log(ctx, "\n");
208 }
209
210 static void
pandecode_samplers(struct pandecode_context * ctx,uint64_t samplers,unsigned sampler_count)211 pandecode_samplers(struct pandecode_context *ctx, uint64_t samplers,
212 unsigned sampler_count)
213 {
214 pandecode_log(ctx, "Samplers %" PRIx64 ":\n", samplers);
215 ctx->indent++;
216
217 for (int i = 0; i < sampler_count; ++i)
218 DUMP_ADDR(ctx, SAMPLER, samplers + (pan_size(SAMPLER) * i),
219 "Sampler %d:\n", i);
220
221 ctx->indent--;
222 pandecode_log(ctx, "\n");
223 }
224
225 static void
pandecode_uniform_buffers(struct pandecode_context * ctx,uint64_t pubufs,int ubufs_count)226 pandecode_uniform_buffers(struct pandecode_context *ctx, uint64_t pubufs,
227 int ubufs_count)
228 {
229 uint64_t *PANDECODE_PTR_VAR(ctx, ubufs, pubufs);
230
231 for (int i = 0; i < ubufs_count; i++) {
232 uint64_t addr = (ubufs[i] >> 10) << 2;
233 unsigned size = addr ? (((ubufs[i] & ((1 << 10) - 1)) + 1) * 16) : 0;
234
235 pandecode_validate_buffer(ctx, addr, size);
236
237 char *ptr = pointer_as_memory_reference(ctx, addr);
238 pandecode_log(ctx, "ubuf_%d[%u] = %s;\n", i, size, ptr);
239 free(ptr);
240 }
241
242 pandecode_log(ctx, "\n");
243 }
244
245 static void
pandecode_uniforms(struct pandecode_context * ctx,uint64_t uniforms,unsigned uniform_count)246 pandecode_uniforms(struct pandecode_context *ctx, uint64_t uniforms,
247 unsigned uniform_count)
248 {
249 pandecode_validate_buffer(ctx, uniforms, uniform_count * 16);
250
251 char *ptr = pointer_as_memory_reference(ctx, uniforms);
252 pandecode_log(ctx, "vec4 uniforms[%u] = %s;\n", uniform_count, ptr);
253 free(ptr);
254 pandecode_log(ctx, "\n");
255 }
256
257 void
GENX(pandecode_dcd)258 GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p,
259 enum mali_job_type job_type, unsigned gpu_id)
260 {
261 #if PAN_ARCH >= 5
262 struct pandecode_fbd fbd_info = {.rt_count = 1};
263 #endif
264
265 if (PAN_ARCH >= 6 || (PAN_ARCH == 5 && job_type != MALI_JOB_TYPE_TILER)) {
266 #if PAN_ARCH >= 5
267 DUMP_ADDR(ctx, LOCAL_STORAGE, p->thread_storage & ~1, "Local Storage:\n");
268 #endif
269 } else {
270 #if PAN_ARCH == 5
271 /* On v5 only, the actual framebuffer pointer is tagged with extra
272 * metadata that we validate but do not print.
273 */
274 const uint64_t *fbd = &p->fbd;
275 pan_cast_and_unpack(fbd, FRAMEBUFFER_POINTER, ptr);
276
277 if (!ptr.type || ptr.zs_crc_extension_present ||
278 ptr.render_target_count != 1) {
279
280 fprintf(ctx->dump_stream, "Unexpected framebuffer pointer settings");
281 }
282
283 GENX(pandecode_fbd)(ctx, ptr.pointer, false, gpu_id);
284 #elif PAN_ARCH == 4
285 GENX(pandecode_fbd)(ctx, p->fbd, false, gpu_id);
286 #endif
287 }
288
289 int varying_count = 0, attribute_count = 0, uniform_count = 0,
290 uniform_buffer_count = 0;
291 int texture_count = 0, sampler_count = 0;
292
293 if (p->state) {
294 struct mali_renderer_state_packed *cl =
295 pandecode_fetch_gpu_mem(ctx, p->state, pan_size(RENDERER_STATE));
296
297 pan_unpack(cl, RENDERER_STATE, state);
298
299 if (state.shader.shader & ~0xF)
300 pandecode_shader_disassemble(ctx, state.shader.shader & ~0xF, gpu_id);
301
302 #if PAN_ARCH >= 6
303 bool idvs = (job_type == MALI_JOB_TYPE_INDEXED_VERTEX);
304
305 if (idvs && state.secondary_shader)
306 pandecode_shader_disassemble(ctx, state.secondary_shader, gpu_id);
307 #endif
308 DUMP_UNPACKED(ctx, RENDERER_STATE, state, "State:\n");
309 ctx->indent++;
310
311 /* Save for dumps */
312 attribute_count = state.shader.attribute_count;
313 varying_count = state.shader.varying_count;
314 texture_count = state.shader.texture_count;
315 sampler_count = state.shader.sampler_count;
316 uniform_buffer_count = state.properties.uniform_buffer_count;
317
318 #if PAN_ARCH >= 6
319 uniform_count = state.preload.uniform_count;
320 #else
321 uniform_count = state.properties.uniform_count;
322 #endif
323
324 #if PAN_ARCH == 4
325 uint64_t shader = state.blend_shader & ~0xF;
326 if (state.multisample_misc.blend_shader && shader)
327 pandecode_shader_disassemble(ctx, shader, gpu_id);
328 #endif
329 ctx->indent--;
330 pandecode_log(ctx, "\n");
331
332 /* MRT blend fields are used on v5+. Technically, they are optional on v5
333 * for backwards compatibility but we don't care about that.
334 */
335 #if PAN_ARCH >= 5
336 if ((job_type == MALI_JOB_TYPE_TILER ||
337 job_type == MALI_JOB_TYPE_FRAGMENT) &&
338 PAN_ARCH >= 5) {
339 void *blend_base = ((void *)cl) + pan_size(RENDERER_STATE);
340
341 for (unsigned i = 0; i < fbd_info.rt_count; i++) {
342 uint64_t shader =
343 GENX(pandecode_blend)(ctx, blend_base, i, state.shader.shader);
344 if (shader & ~0xF)
345 pandecode_shader_disassemble(ctx, shader, gpu_id);
346 }
347 }
348 #endif
349 } else
350 pandecode_log(ctx, "// XXX: missing shader descriptor\n");
351
352 if (p->viewport) {
353 DUMP_ADDR(ctx, VIEWPORT, p->viewport, "Viewport:\n");
354 pandecode_log(ctx, "\n");
355 }
356
357 unsigned max_attr_index = 0;
358
359 if (p->attributes)
360 max_attr_index =
361 pandecode_attribute_meta(ctx, attribute_count, p->attributes, false);
362
363 if (p->attribute_buffers)
364 pandecode_attributes(ctx, p->attribute_buffers, max_attr_index, false,
365 job_type);
366
367 if (p->varyings) {
368 varying_count =
369 pandecode_attribute_meta(ctx, varying_count, p->varyings, true);
370 }
371
372 if (p->varying_buffers)
373 pandecode_attributes(ctx, p->varying_buffers, varying_count, true,
374 job_type);
375
376 if (p->uniform_buffers) {
377 if (uniform_buffer_count)
378 pandecode_uniform_buffers(ctx, p->uniform_buffers,
379 uniform_buffer_count);
380 else
381 pandecode_log(ctx, "// warn: UBOs specified but not referenced\n");
382 } else if (uniform_buffer_count)
383 pandecode_log(ctx, "// XXX: UBOs referenced but not specified\n");
384
385 /* We don't want to actually dump uniforms, but we do need to validate
386 * that the counts we were given are sane */
387
388 if (p->push_uniforms) {
389 if (uniform_count)
390 pandecode_uniforms(ctx, p->push_uniforms, uniform_count);
391 else
392 pandecode_log(ctx, "// warn: Uniforms specified but not referenced\n");
393 } else if (uniform_count)
394 pandecode_log(ctx, "// XXX: Uniforms referenced but not specified\n");
395
396 if (p->textures)
397 pandecode_textures(ctx, p->textures, texture_count);
398
399 if (p->samplers)
400 pandecode_samplers(ctx, p->samplers, sampler_count);
401 }
402
403 static void
pandecode_vertex_compute_geometry_job(struct pandecode_context * ctx,const struct MALI_JOB_HEADER * h,uint64_t job,unsigned gpu_id)404 pandecode_vertex_compute_geometry_job(struct pandecode_context *ctx,
405 const struct MALI_JOB_HEADER *h,
406 uint64_t job, unsigned gpu_id)
407 {
408 struct mali_compute_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
409 pan_section_unpack(p, COMPUTE_JOB, DRAW, draw);
410 GENX(pandecode_dcd)(ctx, &draw, h->type, gpu_id);
411
412 pandecode_log(ctx, "Vertex Job Payload:\n");
413 ctx->indent++;
414 pandecode_invocation(ctx, pan_section_ptr(p, COMPUTE_JOB, INVOCATION));
415 DUMP_SECTION(ctx, COMPUTE_JOB, PARAMETERS, p, "Vertex Job Parameters:\n");
416 DUMP_UNPACKED(ctx, DRAW, draw, "Draw:\n");
417 ctx->indent--;
418 pandecode_log(ctx, "\n");
419 }
420 #endif
421
422 static void
pandecode_write_value_job(struct pandecode_context * ctx,uint64_t job)423 pandecode_write_value_job(struct pandecode_context *ctx, uint64_t job)
424 {
425 struct mali_write_value_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
426 pan_section_unpack(p, WRITE_VALUE_JOB, PAYLOAD, u);
427 DUMP_SECTION(ctx, WRITE_VALUE_JOB, PAYLOAD, p, "Write Value Payload:\n");
428 pandecode_log(ctx, "\n");
429 }
430
431 static void
pandecode_cache_flush_job(struct pandecode_context * ctx,uint64_t job)432 pandecode_cache_flush_job(struct pandecode_context *ctx, uint64_t job)
433 {
434 struct mali_cache_flush_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
435 pan_section_unpack(p, CACHE_FLUSH_JOB, PAYLOAD, u);
436 DUMP_SECTION(ctx, CACHE_FLUSH_JOB, PAYLOAD, p, "Cache Flush Payload:\n");
437 pandecode_log(ctx, "\n");
438 }
439
440 static void
pandecode_tiler_job(struct pandecode_context * ctx,const struct MALI_JOB_HEADER * h,uint64_t job,unsigned gpu_id)441 pandecode_tiler_job(struct pandecode_context *ctx,
442 const struct MALI_JOB_HEADER *h, uint64_t job,
443 unsigned gpu_id)
444 {
445 struct mali_tiler_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
446 pan_section_unpack(p, TILER_JOB, DRAW, draw);
447 GENX(pandecode_dcd)(ctx, &draw, h->type, gpu_id);
448 pandecode_log(ctx, "Tiler Job Payload:\n");
449 ctx->indent++;
450
451 #if PAN_ARCH <= 7
452 pandecode_invocation(ctx, pan_section_ptr(p, TILER_JOB, INVOCATION));
453 #endif
454
455 pandecode_primitive(ctx, pan_section_ptr(p, TILER_JOB, PRIMITIVE));
456 DUMP_UNPACKED(ctx, DRAW, draw, "Draw:\n");
457
458 DUMP_SECTION(ctx, TILER_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n");
459
460 #if PAN_ARCH >= 6
461 pan_section_unpack(p, TILER_JOB, TILER, tiler_ptr);
462 GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id);
463
464 #if PAN_ARCH >= 9
465 DUMP_SECTION(ctx, TILER_JOB, INSTANCE_COUNT, p, "Instance count:\n");
466 DUMP_SECTION(ctx, TILER_JOB, VERTEX_COUNT, p, "Vertex count:\n");
467 DUMP_SECTION(ctx, TILER_JOB, SCISSOR, p, "Scissor:\n");
468 DUMP_SECTION(ctx, TILER_JOB, INDICES, p, "Indices:\n");
469 #else
470 pan_section_unpack(p, TILER_JOB, PADDING, padding);
471 #endif
472
473 #endif
474 ctx->indent--;
475 pandecode_log(ctx, "\n");
476 }
477
478 static void
pandecode_fragment_job(struct pandecode_context * ctx,uint64_t job,unsigned gpu_id)479 pandecode_fragment_job(struct pandecode_context *ctx, uint64_t job,
480 unsigned gpu_id)
481 {
482 struct mali_fragment_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
483 pan_section_unpack(p, FRAGMENT_JOB, PAYLOAD, s);
484
485 uint64_t fbd_pointer;
486
487 #if PAN_ARCH >= 5
488 /* On v5 and newer, the actual framebuffer pointer is tagged with extra
489 * metadata that we need to disregard.
490 */
491 const uint64_t *framebuffer_packed_raw = &s.framebuffer;
492 pan_cast_and_unpack(framebuffer_packed_raw, FRAMEBUFFER_POINTER, ptr);
493 fbd_pointer = ptr.pointer;
494 #else
495 /* On v4, the framebuffer pointer is untagged. */
496 fbd_pointer = s.framebuffer;
497 #endif
498
499 UNUSED struct pandecode_fbd info =
500 GENX(pandecode_fbd)(ctx, fbd_pointer, true, gpu_id);
501
502 #if PAN_ARCH >= 5
503 if (!ptr.type || ptr.zs_crc_extension_present != info.has_extra ||
504 ptr.render_target_count != info.rt_count) {
505 pandecode_log(ctx, "invalid FBD tag\n");
506 }
507 #endif
508
509 DUMP_UNPACKED(ctx, FRAGMENT_JOB_PAYLOAD, s, "Fragment Job Payload:\n");
510
511 pandecode_log(ctx, "\n");
512 }
513
514 #if PAN_ARCH == 6 || PAN_ARCH == 7
515 static void
pandecode_indexed_vertex_job(struct pandecode_context * ctx,const struct MALI_JOB_HEADER * h,uint64_t job,unsigned gpu_id)516 pandecode_indexed_vertex_job(struct pandecode_context *ctx,
517 const struct MALI_JOB_HEADER *h, uint64_t job,
518 unsigned gpu_id)
519 {
520 struct mali_indexed_vertex_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
521
522 pandecode_log(ctx, "Vertex:\n");
523 pan_section_unpack(p, INDEXED_VERTEX_JOB, VERTEX_DRAW, vert_draw);
524 GENX(pandecode_dcd)(ctx, &vert_draw, h->type, gpu_id);
525 DUMP_UNPACKED(ctx, DRAW, vert_draw, "Vertex Draw:\n");
526
527 pandecode_log(ctx, "Fragment:\n");
528 pan_section_unpack(p, INDEXED_VERTEX_JOB, FRAGMENT_DRAW, frag_draw);
529 GENX(pandecode_dcd)(ctx, &frag_draw, MALI_JOB_TYPE_FRAGMENT, gpu_id);
530 DUMP_UNPACKED(ctx, DRAW, frag_draw, "Fragment Draw:\n");
531
532 pan_section_unpack(p, INDEXED_VERTEX_JOB, TILER, tiler_ptr);
533 pandecode_log(ctx, "Tiler Job Payload:\n");
534 ctx->indent++;
535 GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id);
536 ctx->indent--;
537
538 pandecode_invocation(ctx,
539 pan_section_ptr(p, INDEXED_VERTEX_JOB, INVOCATION));
540 pandecode_primitive(ctx, pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE));
541
542 DUMP_SECTION(ctx, INDEXED_VERTEX_JOB, PRIMITIVE_SIZE, p,
543 "Primitive Size:\n");
544
545 pan_section_unpack(p, INDEXED_VERTEX_JOB, PADDING, padding);
546 }
547 #endif
548
549 #if PAN_ARCH == 9
550 static void
pandecode_malloc_vertex_job(struct pandecode_context * ctx,uint64_t job,unsigned gpu_id)551 pandecode_malloc_vertex_job(struct pandecode_context *ctx, uint64_t job,
552 unsigned gpu_id)
553 {
554 struct mali_malloc_vertex_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
555
556 DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, PRIMITIVE, p, "Primitive:\n");
557 DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, INSTANCE_COUNT, p, "Instance count:\n");
558 DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, ALLOCATION, p, "Allocation:\n");
559 DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, TILER, p, "Tiler:\n");
560 DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, SCISSOR, p, "Scissor:\n");
561 DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n");
562 DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, INDICES, p, "Indices:\n");
563
564 pan_section_unpack(p, MALLOC_VERTEX_JOB, DRAW, dcd);
565
566 pan_section_unpack(p, MALLOC_VERTEX_JOB, TILER, tiler_ptr);
567 pandecode_log(ctx, "Tiler Job Payload:\n");
568 ctx->indent++;
569 if (tiler_ptr.address)
570 GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id);
571 else
572 pandecode_log(ctx, "<omitted>\n");
573 ctx->indent--;
574
575 GENX(pandecode_dcd)(ctx, &dcd, 0, gpu_id);
576
577 pan_section_unpack(p, MALLOC_VERTEX_JOB, POSITION, position);
578 pan_section_unpack(p, MALLOC_VERTEX_JOB, VARYING, varying);
579 GENX(pandecode_shader_environment)(ctx, &position, gpu_id);
580 GENX(pandecode_shader_environment)(ctx, &varying, gpu_id);
581 }
582
583 static void
pandecode_compute_job(struct pandecode_context * ctx,uint64_t job,unsigned gpu_id)584 pandecode_compute_job(struct pandecode_context *ctx, uint64_t job,
585 unsigned gpu_id)
586 {
587 struct mali_compute_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
588 pan_section_unpack(p, COMPUTE_JOB, PAYLOAD, payload);
589
590 GENX(pandecode_shader_environment)(ctx, &payload.compute, gpu_id);
591 DUMP_SECTION(ctx, COMPUTE_JOB, PAYLOAD, p, "Compute");
592 }
593 #endif
594
595 /*
596 * Trace a job chain at a particular GPU address, interpreted for a particular
597 * GPU using the job manager.
598 */
599 void
GENX(pandecode_jc)600 GENX(pandecode_jc)(struct pandecode_context *ctx, uint64_t jc_gpu_va,
601 unsigned gpu_id)
602 {
603 pandecode_dump_file_open(ctx);
604
605 struct set *va_set = _mesa_pointer_set_create(NULL);
606 struct set_entry *entry = NULL;
607
608 uint64_t next_job = 0;
609
610 do {
611 struct mali_job_header_packed *hdr =
612 PANDECODE_PTR(ctx, jc_gpu_va, struct mali_job_header_packed);
613
614 entry = _mesa_set_search(va_set, hdr);
615 if (entry != NULL) {
616 fprintf(stdout, "Job list has a cycle\n");
617 break;
618 }
619
620 pan_unpack(hdr, JOB_HEADER, h);
621 next_job = h.next;
622
623 DUMP_UNPACKED(ctx, JOB_HEADER, h, "Job Header (%" PRIx64 "):\n",
624 jc_gpu_va);
625 pandecode_log(ctx, "\n");
626
627 switch (h.type) {
628 case MALI_JOB_TYPE_WRITE_VALUE:
629 pandecode_write_value_job(ctx, jc_gpu_va);
630 break;
631
632 case MALI_JOB_TYPE_CACHE_FLUSH:
633 pandecode_cache_flush_job(ctx, jc_gpu_va);
634 break;
635
636 case MALI_JOB_TYPE_TILER:
637 pandecode_tiler_job(ctx, &h, jc_gpu_va, gpu_id);
638 break;
639
640 #if PAN_ARCH <= 7
641 case MALI_JOB_TYPE_VERTEX:
642 case MALI_JOB_TYPE_COMPUTE:
643 pandecode_vertex_compute_geometry_job(ctx, &h, jc_gpu_va, gpu_id);
644 break;
645
646 #if PAN_ARCH >= 6
647 case MALI_JOB_TYPE_INDEXED_VERTEX:
648 pandecode_indexed_vertex_job(ctx, &h, jc_gpu_va, gpu_id);
649 break;
650 #endif
651 #else
652 case MALI_JOB_TYPE_COMPUTE:
653 pandecode_compute_job(ctx, jc_gpu_va, gpu_id);
654 break;
655
656 case MALI_JOB_TYPE_MALLOC_VERTEX:
657 pandecode_malloc_vertex_job(ctx, jc_gpu_va, gpu_id);
658 break;
659 #endif
660
661 case MALI_JOB_TYPE_FRAGMENT:
662 pandecode_fragment_job(ctx, jc_gpu_va, gpu_id);
663 break;
664
665 default:
666 break;
667 }
668
669 /* Track the latest visited job CPU VA to detect cycles */
670 _mesa_set_add(va_set, hdr);
671 } while ((jc_gpu_va = next_job));
672
673 _mesa_set_destroy(va_set, NULL);
674
675 fflush(ctx->dump_stream);
676 pandecode_map_read_write(ctx);
677 }
678
679 void
GENX(pandecode_abort_on_fault)680 GENX(pandecode_abort_on_fault)(struct pandecode_context *ctx,
681 uint64_t jc_gpu_va)
682 {
683 uint64_t next_job = 0;
684
685 do {
686 pan_unpack(PANDECODE_PTR(ctx, jc_gpu_va, struct mali_job_header_packed),
687 JOB_HEADER, h);
688 next_job = h.next;
689
690 /* Ensure the job is marked COMPLETE */
691 if (h.exception_status != 0x1) {
692 fprintf(stderr, "Incomplete job or timeout\n");
693 fflush(NULL);
694 abort();
695 }
696 } while ((jc_gpu_va = next_job));
697
698 pandecode_map_read_write(ctx);
699 }
700
701 #endif
702