1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include "drm-uapi/v3d_drm.h"
28 #include "clif_dump.h"
29 #include "clif_private.h"
30 #include "util/list.h"
31 #include "util/ralloc.h"
32
33 #include "broadcom/cle/v3d_decoder.h"
34
35 struct reloc_worklist_entry *
clif_dump_add_address_to_worklist(struct clif_dump * clif,enum reloc_worklist_type type,uint32_t addr)36 clif_dump_add_address_to_worklist(struct clif_dump *clif,
37 enum reloc_worklist_type type,
38 uint32_t addr)
39 {
40 struct reloc_worklist_entry *entry =
41 rzalloc(clif, struct reloc_worklist_entry);
42 if (!entry)
43 return NULL;
44
45 entry->type = type;
46 entry->addr = addr;
47
48 list_addtail(&entry->link, &clif->worklist);
49
50 return entry;
51 }
52
53 struct clif_dump *
clif_dump_init(const struct v3d_device_info * devinfo,FILE * out,bool pretty,bool nobin)54 clif_dump_init(const struct v3d_device_info *devinfo,
55 FILE *out, bool pretty, bool nobin)
56 {
57 struct clif_dump *clif = rzalloc(NULL, struct clif_dump);
58
59 clif->devinfo = devinfo;
60 clif->out = out;
61 clif->spec = v3d_spec_load(devinfo);
62 clif->pretty = pretty;
63 clif->nobin = nobin;
64
65 list_inithead(&clif->worklist);
66
67 return clif;
68 }
69
70 void
clif_dump_destroy(struct clif_dump * clif)71 clif_dump_destroy(struct clif_dump *clif)
72 {
73 ralloc_free(clif);
74 }
75
76 struct clif_bo *
clif_lookup_bo(struct clif_dump * clif,uint32_t addr)77 clif_lookup_bo(struct clif_dump *clif, uint32_t addr)
78 {
79 for (int i = 0; i < clif->bo_count; i++) {
80 struct clif_bo *bo = &clif->bo[i];
81
82 if (addr >= bo->offset &&
83 addr < bo->offset + bo->size) {
84 return bo;
85 }
86 }
87
88 return NULL;
89 }
90
91 static bool
clif_lookup_vaddr(struct clif_dump * clif,uint32_t addr,void ** vaddr)92 clif_lookup_vaddr(struct clif_dump *clif, uint32_t addr, void **vaddr)
93 {
94 struct clif_bo *bo = clif_lookup_bo(clif, addr);
95 if (!bo)
96 return false;
97
98 *vaddr = bo->vaddr + addr - bo->offset;
99 return true;
100 }
101
102 #define out_uint(_clif, field) out(_clif, " /* %s = */ %u\n", \
103 #field, values-> field);
104
105 static bool
clif_dump_packet(struct clif_dump * clif,uint32_t offset,const uint8_t * cl,uint32_t * size,bool reloc_mode)106 clif_dump_packet(struct clif_dump *clif, uint32_t offset, const uint8_t *cl,
107 uint32_t *size, bool reloc_mode)
108 {
109 if (clif->devinfo->ver >= 42)
110 return v3d42_clif_dump_packet(clif, offset, cl, size, reloc_mode);
111 else if (clif->devinfo->ver >= 41)
112 return v3d41_clif_dump_packet(clif, offset, cl, size, reloc_mode);
113 else
114 return v3d33_clif_dump_packet(clif, offset, cl, size, reloc_mode);
115 }
116
117 static uint32_t
clif_dump_cl(struct clif_dump * clif,uint32_t start,uint32_t end,bool reloc_mode)118 clif_dump_cl(struct clif_dump *clif, uint32_t start, uint32_t end,
119 bool reloc_mode)
120 {
121 struct clif_bo *bo = clif_lookup_bo(clif, start);
122 if (!bo) {
123 out(clif, "Failed to look up address 0x%08x\n",
124 start);
125 return 0;
126 }
127
128 void *start_vaddr = bo->vaddr + start - bo->offset;
129
130 /* The end address is optional (for example, a BRANCH instruction
131 * won't set an end), but is used for BCL/RCL termination.
132 */
133 void *end_vaddr = NULL;
134 if (end && !clif_lookup_vaddr(clif, end, &end_vaddr)) {
135 out(clif, "Failed to look up address 0x%08x\n",
136 end);
137 return 0;
138 }
139
140 if (!reloc_mode)
141 out(clif, "@format ctrllist /* [%s+0x%08x] */\n",
142 bo->name, start - bo->offset);
143
144 uint32_t size;
145 uint8_t *cl = start_vaddr;
146 while (clif_dump_packet(clif, start, cl, &size, reloc_mode)) {
147 cl += size;
148 start += size;
149
150 if (cl == end_vaddr)
151 break;
152 }
153
154 return (void *)cl - bo->vaddr;
155 }
156
157 /* Walks the worklist, parsing the relocs for any memory regions that might
158 * themselves have additional relocations.
159 */
160 static uint32_t
clif_dump_gl_shader_state_record(struct clif_dump * clif,struct reloc_worklist_entry * reloc,void * vaddr,bool including_gs)161 clif_dump_gl_shader_state_record(struct clif_dump *clif,
162 struct reloc_worklist_entry *reloc,
163 void *vaddr,
164 bool including_gs)
165 {
166 struct v3d_group *state = v3d_spec_find_struct(clif->spec,
167 "GL Shader State Record");
168 struct v3d_group *attr = v3d_spec_find_struct(clif->spec,
169 "GL Shader State Attribute Record");
170 assert(state);
171 assert(attr);
172 uint32_t offset = 0;
173
174 if (including_gs) {
175 struct v3d_group *gs_state = v3d_spec_find_struct(clif->spec,
176 "Geometry Shader State Record");
177 assert(gs_state);
178 out(clif, "@format shadrec_gl_geom\n");
179 v3d_print_group(clif, gs_state, 0, vaddr + offset);
180 offset += v3d_group_get_length(gs_state);
181 /* Extra pad when geometry/tessellation shader is present */
182 offset += 20;
183 }
184 out(clif, "@format shadrec_gl_main\n");
185 v3d_print_group(clif, state, 0, vaddr + offset);
186 offset += v3d_group_get_length(state);
187
188 for (int i = 0; i < reloc->shader_state.num_attrs; i++) {
189 out(clif, "@format shadrec_gl_attr /* %d */\n", i);
190 v3d_print_group(clif, attr, 0, vaddr + offset);
191 offset += v3d_group_get_length(attr);
192 }
193
194 return offset;
195 }
196
197 static void
clif_process_worklist(struct clif_dump * clif)198 clif_process_worklist(struct clif_dump *clif)
199 {
200 list_for_each_entry_safe(struct reloc_worklist_entry, reloc,
201 &clif->worklist, link) {
202 void *vaddr;
203 if (!clif_lookup_vaddr(clif, reloc->addr, &vaddr)) {
204 out(clif, "Failed to look up address 0x%08x\n",
205 reloc->addr);
206 continue;
207 }
208
209 switch (reloc->type) {
210 case reloc_cl:
211 clif_dump_cl(clif, reloc->addr, reloc->cl.end, true);
212 break;
213
214 case reloc_gl_shader_state:
215 case reloc_gl_including_gs_shader_state:
216 break;
217 case reloc_generic_tile_list:
218 clif_dump_cl(clif, reloc->addr,
219 reloc->generic_tile_list.end, true);
220 break;
221 }
222 }
223 }
224
225 static int
worklist_entry_compare(const void * a,const void * b)226 worklist_entry_compare(const void *a, const void *b)
227 {
228 return ((*(struct reloc_worklist_entry **)a)->addr -
229 (*(struct reloc_worklist_entry **)b)->addr);
230 }
231
232 static bool
clif_dump_if_blank(struct clif_dump * clif,struct clif_bo * bo,uint32_t start,uint32_t end)233 clif_dump_if_blank(struct clif_dump *clif, struct clif_bo *bo,
234 uint32_t start, uint32_t end)
235 {
236 for (int i = start; i < end; i++) {
237 if (((uint8_t *)bo->vaddr)[i] != 0)
238 return false;
239 }
240
241 out(clif, "\n");
242 out(clif, "@format blank %d /* [%s+0x%08x..0x%08x] */\n", end - start,
243 bo->name, start, end - 1);
244 return true;
245 }
246
247 /* Dumps the binary data in the BO from start to end (relative to the start of
248 * the BO).
249 */
250 static void
clif_dump_binary(struct clif_dump * clif,struct clif_bo * bo,uint32_t start,uint32_t end)251 clif_dump_binary(struct clif_dump *clif, struct clif_bo *bo,
252 uint32_t start, uint32_t end)
253 {
254 if (clif->pretty && clif->nobin)
255 return;
256
257 if (start == end)
258 return;
259
260 if (clif_dump_if_blank(clif, bo, start, end))
261 return;
262
263 out(clif, "@format binary /* [%s+0x%08x] */\n",
264 bo->name, start);
265
266 uint32_t offset = start;
267 int dumped_in_line = 0;
268 while (offset < end) {
269 if (clif_dump_if_blank(clif, bo, offset, end))
270 return;
271
272 if (end - offset >= 4) {
273 out(clif, "0x%08x ", *(uint32_t *)(bo->vaddr + offset));
274 offset += 4;
275 } else {
276 out(clif, "0x%02x ", *(uint8_t *)(bo->vaddr + offset));
277 offset++;
278 }
279
280 if (++dumped_in_line == 8) {
281 out(clif, "\n");
282 dumped_in_line = 0;
283 }
284 }
285 if (dumped_in_line)
286 out(clif, "\n");
287 }
288
289 /* Walks the list of relocations, dumping each buffer's contents (using our
290 * codegenned dump routines for pretty printing, and most importantly proper
291 * address references so that the CLIF parser can relocate buffers).
292 */
293 static void
clif_dump_buffers(struct clif_dump * clif)294 clif_dump_buffers(struct clif_dump *clif)
295 {
296 int num_relocs = 0;
297 list_for_each_entry(struct reloc_worklist_entry, reloc,
298 &clif->worklist, link) {
299 num_relocs++;
300 }
301 struct reloc_worklist_entry **relocs =
302 ralloc_array(clif, struct reloc_worklist_entry *, num_relocs);
303 int i = 0;
304 list_for_each_entry(struct reloc_worklist_entry, reloc,
305 &clif->worklist, link) {
306 relocs[i++] = reloc;
307 }
308 qsort(relocs, num_relocs, sizeof(*relocs), worklist_entry_compare);
309
310 struct clif_bo *bo = NULL;
311 uint32_t offset = 0;
312
313 for (i = 0; i < num_relocs; i++) {
314 struct reloc_worklist_entry *reloc = relocs[i];
315 struct clif_bo *new_bo = clif_lookup_bo(clif, reloc->addr);
316
317 if (!new_bo) {
318 out(clif, "Failed to look up address 0x%08x\n",
319 reloc->addr);
320 continue;
321 }
322
323 if (new_bo != bo) {
324 if (bo) {
325 /* Finish out the last of the last BO. */
326 clif_dump_binary(clif, bo,
327 offset,
328 bo->size);
329 }
330
331 out(clif, "\n");
332 out(clif, "@buffer %s\n", new_bo->name);
333 bo = new_bo;
334 offset = 0;
335 bo->dumped = true;
336 }
337
338 int reloc_offset = reloc->addr - bo->offset;
339 if (offset != reloc_offset)
340 clif_dump_binary(clif, bo, offset, reloc_offset);
341 offset = reloc_offset;
342
343 switch (reloc->type) {
344 case reloc_cl:
345 offset = clif_dump_cl(clif, reloc->addr, reloc->cl.end,
346 false);
347 out(clif, "\n");
348 break;
349
350 case reloc_gl_shader_state:
351 case reloc_gl_including_gs_shader_state:
352 offset += clif_dump_gl_shader_state_record(clif,
353 reloc,
354 bo->vaddr +
355 offset,
356 reloc->type == reloc_gl_including_gs_shader_state);
357 break;
358 case reloc_generic_tile_list:
359 offset = clif_dump_cl(clif, reloc->addr,
360 reloc->generic_tile_list.end,
361 false);
362 break;
363 }
364 out(clif, "\n");
365 }
366
367 if (bo) {
368 clif_dump_binary(clif, bo, offset, bo->size);
369 }
370
371 /* For any BOs that didn't have relocations, just dump them raw. */
372 for (int i = 0; i < clif->bo_count; i++) {
373 bo = &clif->bo[i];
374 if (bo->dumped)
375 continue;
376 out(clif, "@buffer %s\n", bo->name);
377 clif_dump_binary(clif, bo, 0, bo->size);
378 out(clif, "\n");
379 }
380 }
381
382 void
clif_dump_add_cl(struct clif_dump * clif,uint32_t start,uint32_t end)383 clif_dump_add_cl(struct clif_dump *clif, uint32_t start, uint32_t end)
384 {
385 struct reloc_worklist_entry *entry =
386 clif_dump_add_address_to_worklist(clif, reloc_cl, start);
387
388 entry->cl.end = end;
389 }
390
391 static int
clif_bo_offset_compare(const void * a,const void * b)392 clif_bo_offset_compare(const void *a, const void *b)
393 {
394 return ((struct clif_bo *)a)->offset - ((struct clif_bo *)b)->offset;
395 }
396
397 void
clif_dump(struct clif_dump * clif,const struct drm_v3d_submit_cl * submit)398 clif_dump(struct clif_dump *clif, const struct drm_v3d_submit_cl *submit)
399 {
400 clif_dump_add_cl(clif, submit->bcl_start, submit->bcl_end);
401 clif_dump_add_cl(clif, submit->rcl_start, submit->rcl_end);
402
403 qsort(clif->bo, clif->bo_count, sizeof(clif->bo[0]),
404 clif_bo_offset_compare);
405
406 /* A buffer needs to be defined before we can emit a CLIF address
407 * referencing it, so emit them all now.
408 */
409 for (int i = 0; i < clif->bo_count; i++) {
410 out(clif, "@createbuf_aligned 4096 %s\n", clif->bo[i].name);
411 }
412
413 /* Walk the worklist figuring out the locations of structs based on
414 * the CL contents.
415 */
416 clif_process_worklist(clif);
417
418 /* Dump the contents of the buffers using the relocations we found to
419 * pretty-print structures.
420 */
421 clif_dump_buffers(clif);
422
423 out(clif, "@add_bin 0\n ");
424 out_address(clif, submit->bcl_start);
425 out(clif, "\n ");
426 out_address(clif, submit->bcl_end);
427 out(clif, "\n ");
428 out_address(clif, submit->qma);
429 out(clif, "\n %d\n ", submit->qms);
430 out_address(clif, submit->qts);
431 out(clif, "\n");
432 out(clif, "@wait_bin_all_cores\n");
433
434 out(clif, "@add_render 0\n ");
435 out_address(clif, submit->rcl_start);
436 out(clif, "\n ");
437 out_address(clif, submit->rcl_end);
438 out(clif, "\n ");
439 out_address(clif, submit->qma);
440 out(clif, "\n");
441 out(clif, "@wait_render_all_cores\n");
442 }
443
444 void
clif_dump_add_bo(struct clif_dump * clif,const char * name,uint32_t offset,uint32_t size,void * vaddr)445 clif_dump_add_bo(struct clif_dump *clif, const char *name,
446 uint32_t offset, uint32_t size, void *vaddr)
447 {
448 if (clif->bo_count >= clif->bo_array_size) {
449 clif->bo_array_size = MAX2(4, clif->bo_array_size * 2);
450 clif->bo = reralloc(clif, clif->bo, struct clif_bo,
451 clif->bo_array_size);
452 }
453
454 /* CLIF relocs use the buffer name, so make sure they're unique. */
455 for (int i = 0; i < clif->bo_count; i++)
456 assert(strcmp(clif->bo[i].name, name) != 0);
457
458 clif->bo[clif->bo_count].name = ralloc_strdup(clif, name);
459 clif->bo[clif->bo_count].offset = offset;
460 clif->bo[clif->bo_count].size = size;
461 clif->bo[clif->bo_count].vaddr = vaddr;
462 clif->bo[clif->bo_count].dumped = false;
463 clif->bo_count++;
464 }
465