1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10
11 struct a6xx_gpu_state_obj {
12 const void *handle;
13 u32 *data;
14 };
15
16 struct a6xx_gpu_state {
17 struct msm_gpu_state base;
18
19 struct a6xx_gpu_state_obj *gmu_registers;
20 int nr_gmu_registers;
21
22 struct a6xx_gpu_state_obj *registers;
23 int nr_registers;
24
25 struct a6xx_gpu_state_obj *shaders;
26 int nr_shaders;
27
28 struct a6xx_gpu_state_obj *clusters;
29 int nr_clusters;
30
31 struct a6xx_gpu_state_obj *dbgahb_clusters;
32 int nr_dbgahb_clusters;
33
34 struct a6xx_gpu_state_obj *indexed_regs;
35 int nr_indexed_regs;
36
37 struct a6xx_gpu_state_obj *debugbus;
38 int nr_debugbus;
39
40 struct a6xx_gpu_state_obj *vbif_debugbus;
41
42 struct a6xx_gpu_state_obj *cx_debugbus;
43 int nr_cx_debugbus;
44
45 struct msm_gpu_state_bo *gmu_log;
46 struct msm_gpu_state_bo *gmu_hfi;
47 struct msm_gpu_state_bo *gmu_debug;
48
49 s32 hfi_queue_history[2][HFI_HISTORY_SZ];
50
51 struct list_head objs;
52
53 bool gpu_initialized;
54 };
55
CRASHDUMP_WRITE(u64 * in,u32 reg,u32 val)56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
57 {
58 in[0] = val;
59 in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
60
61 return 2;
62 }
63
CRASHDUMP_READ(u64 * in,u32 reg,u32 dwords,u64 target)64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
65 {
66 in[0] = target;
67 in[1] = (((u64) reg) << 44 | dwords);
68
69 return 2;
70 }
71
CRASHDUMP_FINI(u64 * in)72 static inline int CRASHDUMP_FINI(u64 *in)
73 {
74 in[0] = 0;
75 in[1] = 0;
76
77 return 2;
78 }
79
80 struct a6xx_crashdumper {
81 void *ptr;
82 struct drm_gem_object *bo;
83 u64 iova;
84 };
85
86 struct a6xx_state_memobj {
87 struct list_head node;
88 unsigned long long data[];
89 };
90
state_kcalloc(struct a6xx_gpu_state * a6xx_state,int nr,size_t objsize)91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
92 {
93 struct a6xx_state_memobj *obj =
94 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
95
96 if (!obj)
97 return NULL;
98
99 list_add_tail(&obj->node, &a6xx_state->objs);
100 return &obj->data;
101 }
102
state_kmemdup(struct a6xx_gpu_state * a6xx_state,void * src,size_t size)103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
104 size_t size)
105 {
106 void *dst = state_kcalloc(a6xx_state, 1, size);
107
108 if (dst)
109 memcpy(dst, src, size);
110 return dst;
111 }
112
113 /*
114 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
115 * the rest for the data
116 */
117 #define A6XX_CD_DATA_OFFSET 8192
118 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192)
119
a6xx_crashdumper_init(struct msm_gpu * gpu,struct a6xx_crashdumper * dumper)120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
121 struct a6xx_crashdumper *dumper)
122 {
123 dumper->ptr = msm_gem_kernel_new(gpu->dev,
124 SZ_1M, MSM_BO_WC, gpu->aspace,
125 &dumper->bo, &dumper->iova);
126
127 if (!IS_ERR(dumper->ptr))
128 msm_gem_object_set_name(dumper->bo, "crashdump");
129
130 return PTR_ERR_OR_ZERO(dumper->ptr);
131 }
132
a6xx_crashdumper_run(struct msm_gpu * gpu,struct a6xx_crashdumper * dumper)133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
134 struct a6xx_crashdumper *dumper)
135 {
136 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
137 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
138 u32 val;
139 int ret;
140
141 if (IS_ERR_OR_NULL(dumper->ptr))
142 return -EINVAL;
143
144 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
145 return -EINVAL;
146
147 /* Make sure all pending memory writes are posted */
148 wmb();
149
150 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
151
152 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
153
154 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
155 val & 0x02, 100, 10000);
156
157 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
158
159 return ret;
160 }
161
162 /* read a value from the GX debug bus */
debugbus_read(struct msm_gpu * gpu,u32 block,u32 offset,u32 * data)163 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
164 u32 *data)
165 {
166 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
167 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
168
169 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
170 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
171 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
172 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
173
174 /* Wait 1 us to make sure the data is flowing */
175 udelay(1);
176
177 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
178 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
179
180 return 2;
181 }
182
183 #define cxdbg_write(ptr, offset, val) \
184 msm_writel((val), (ptr) + ((offset) << 2))
185
186 #define cxdbg_read(ptr, offset) \
187 msm_readl((ptr) + ((offset) << 2))
188
189 /* read a value from the CX debug bus */
cx_debugbus_read(void __iomem * cxdbg,u32 block,u32 offset,u32 * data)190 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
191 u32 *data)
192 {
193 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
194 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
195
196 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
197 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
198 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
199 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
200
201 /* Wait 1 us to make sure the data is flowing */
202 udelay(1);
203
204 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
205 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
206
207 return 2;
208 }
209
210 /* Read a chunk of data from the VBIF debug bus */
vbif_debugbus_read(struct msm_gpu * gpu,u32 ctrl0,u32 ctrl1,u32 reg,int count,u32 * data)211 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
212 u32 reg, int count, u32 *data)
213 {
214 int i;
215
216 gpu_write(gpu, ctrl0, reg);
217
218 for (i = 0; i < count; i++) {
219 gpu_write(gpu, ctrl1, i);
220 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
221 }
222
223 return count;
224 }
225
226 #define AXI_ARB_BLOCKS 2
227 #define XIN_AXI_BLOCKS 5
228 #define XIN_CORE_BLOCKS 4
229
230 #define VBIF_DEBUGBUS_BLOCK_SIZE \
231 ((16 * AXI_ARB_BLOCKS) + \
232 (18 * XIN_AXI_BLOCKS) + \
233 (12 * XIN_CORE_BLOCKS))
234
a6xx_get_vbif_debugbus_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_gpu_state_obj * obj)235 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
236 struct a6xx_gpu_state *a6xx_state,
237 struct a6xx_gpu_state_obj *obj)
238 {
239 u32 clk, *ptr;
240 int i;
241
242 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
243 sizeof(u32));
244 if (!obj->data)
245 return;
246
247 obj->handle = NULL;
248
249 /* Get the current clock setting */
250 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
251
252 /* Force on the bus so we can read it */
253 gpu_write(gpu, REG_A6XX_VBIF_CLKON,
254 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
255
256 /* We will read from BUS2 first, so disable BUS1 */
257 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
258
259 /* Enable the VBIF bus for reading */
260 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
261
262 ptr = obj->data;
263
264 for (i = 0; i < AXI_ARB_BLOCKS; i++)
265 ptr += vbif_debugbus_read(gpu,
266 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
267 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
268 1 << (i + 16), 16, ptr);
269
270 for (i = 0; i < XIN_AXI_BLOCKS; i++)
271 ptr += vbif_debugbus_read(gpu,
272 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
273 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
274 1 << i, 18, ptr);
275
276 /* Stop BUS2 so we can turn on BUS1 */
277 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
278
279 for (i = 0; i < XIN_CORE_BLOCKS; i++)
280 ptr += vbif_debugbus_read(gpu,
281 REG_A6XX_VBIF_TEST_BUS1_CTRL0,
282 REG_A6XX_VBIF_TEST_BUS1_CTRL1,
283 1 << i, 12, ptr);
284
285 /* Restore the VBIF clock setting */
286 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
287 }
288
a6xx_get_debugbus_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_debugbus_block * block,struct a6xx_gpu_state_obj * obj)289 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
290 struct a6xx_gpu_state *a6xx_state,
291 const struct a6xx_debugbus_block *block,
292 struct a6xx_gpu_state_obj *obj)
293 {
294 int i;
295 u32 *ptr;
296
297 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
298 if (!obj->data)
299 return;
300
301 obj->handle = block;
302
303 for (ptr = obj->data, i = 0; i < block->count; i++)
304 ptr += debugbus_read(gpu, block->id, i, ptr);
305 }
306
a6xx_get_cx_debugbus_block(void __iomem * cxdbg,struct a6xx_gpu_state * a6xx_state,const struct a6xx_debugbus_block * block,struct a6xx_gpu_state_obj * obj)307 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
308 struct a6xx_gpu_state *a6xx_state,
309 const struct a6xx_debugbus_block *block,
310 struct a6xx_gpu_state_obj *obj)
311 {
312 int i;
313 u32 *ptr;
314
315 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
316 if (!obj->data)
317 return;
318
319 obj->handle = block;
320
321 for (ptr = obj->data, i = 0; i < block->count; i++)
322 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
323 }
324
a6xx_get_debugbus(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)325 static void a6xx_get_debugbus(struct msm_gpu *gpu,
326 struct a6xx_gpu_state *a6xx_state)
327 {
328 struct resource *res;
329 void __iomem *cxdbg = NULL;
330 int nr_debugbus_blocks;
331
332 /* Set up the GX debug bus */
333
334 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
335 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
336
337 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
338 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
339
340 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
341 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
342 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
343 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
344
345 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
346 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
347
348 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
349 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
350 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
351 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
352
353 /* Set up the CX debug bus - it lives elsewhere in the system so do a
354 * temporary ioremap for the registers
355 */
356 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
357 "cx_dbgc");
358
359 if (res)
360 cxdbg = ioremap(res->start, resource_size(res));
361
362 if (cxdbg) {
363 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
364 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
365
366 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
367 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
368
369 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
370 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
371 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
372 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
373
374 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
375 0x76543210);
376 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
377 0xFEDCBA98);
378
379 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
380 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
381 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
382 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
383 }
384
385 nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
386 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
387
388 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
389 sizeof(*a6xx_state->debugbus));
390
391 if (a6xx_state->debugbus) {
392 int i;
393
394 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
395 a6xx_get_debugbus_block(gpu,
396 a6xx_state,
397 &a6xx_debugbus_blocks[i],
398 &a6xx_state->debugbus[i]);
399
400 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
401
402 /*
403 * GBIF has same debugbus as of other GPU blocks, fall back to
404 * default path if GPU uses GBIF, also GBIF uses exactly same
405 * ID as of VBIF.
406 */
407 if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
408 a6xx_get_debugbus_block(gpu, a6xx_state,
409 &a6xx_gbif_debugbus_block,
410 &a6xx_state->debugbus[i]);
411
412 a6xx_state->nr_debugbus += 1;
413 }
414 }
415
416 /* Dump the VBIF debugbus on applicable targets */
417 if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
418 a6xx_state->vbif_debugbus =
419 state_kcalloc(a6xx_state, 1,
420 sizeof(*a6xx_state->vbif_debugbus));
421
422 if (a6xx_state->vbif_debugbus)
423 a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
424 a6xx_state->vbif_debugbus);
425 }
426
427 if (cxdbg) {
428 a6xx_state->cx_debugbus =
429 state_kcalloc(a6xx_state,
430 ARRAY_SIZE(a6xx_cx_debugbus_blocks),
431 sizeof(*a6xx_state->cx_debugbus));
432
433 if (a6xx_state->cx_debugbus) {
434 int i;
435
436 for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
437 a6xx_get_cx_debugbus_block(cxdbg,
438 a6xx_state,
439 &a6xx_cx_debugbus_blocks[i],
440 &a6xx_state->cx_debugbus[i]);
441
442 a6xx_state->nr_cx_debugbus =
443 ARRAY_SIZE(a6xx_cx_debugbus_blocks);
444 }
445
446 iounmap(cxdbg);
447 }
448 }
449
450 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
451
452 /* Read a data cluster from behind the AHB aperture */
a6xx_get_dbgahb_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_dbgahb_cluster * dbgahb,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)453 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
454 struct a6xx_gpu_state *a6xx_state,
455 const struct a6xx_dbgahb_cluster *dbgahb,
456 struct a6xx_gpu_state_obj *obj,
457 struct a6xx_crashdumper *dumper)
458 {
459 u64 *in = dumper->ptr;
460 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
461 size_t datasize;
462 int i, regcount = 0;
463
464 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
465 int j;
466
467 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
468 (dbgahb->statetype + i * 2) << 8);
469
470 for (j = 0; j < dbgahb->count; j += 2) {
471 int count = RANGE(dbgahb->registers, j);
472 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
473 dbgahb->registers[j] - (dbgahb->base >> 2);
474
475 in += CRASHDUMP_READ(in, offset, count, out);
476
477 out += count * sizeof(u32);
478
479 if (i == 0)
480 regcount += count;
481 }
482 }
483
484 CRASHDUMP_FINI(in);
485
486 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
487
488 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
489 return;
490
491 if (a6xx_crashdumper_run(gpu, dumper))
492 return;
493
494 obj->handle = dbgahb;
495 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
496 datasize);
497 }
498
a6xx_get_dbgahb_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)499 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
500 struct a6xx_gpu_state *a6xx_state,
501 struct a6xx_crashdumper *dumper)
502 {
503 int i;
504
505 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
506 ARRAY_SIZE(a6xx_dbgahb_clusters),
507 sizeof(*a6xx_state->dbgahb_clusters));
508
509 if (!a6xx_state->dbgahb_clusters)
510 return;
511
512 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
513
514 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
515 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
516 &a6xx_dbgahb_clusters[i],
517 &a6xx_state->dbgahb_clusters[i], dumper);
518 }
519
520 /* Read a data cluster from the CP aperture with the crashdumper */
a6xx_get_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_cluster * cluster,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)521 static void a6xx_get_cluster(struct msm_gpu *gpu,
522 struct a6xx_gpu_state *a6xx_state,
523 const struct a6xx_cluster *cluster,
524 struct a6xx_gpu_state_obj *obj,
525 struct a6xx_crashdumper *dumper)
526 {
527 u64 *in = dumper->ptr;
528 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
529 size_t datasize;
530 int i, regcount = 0;
531
532 /* Some clusters need a selector register to be programmed too */
533 if (cluster->sel_reg)
534 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
535
536 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
537 int j;
538
539 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
540 (cluster->id << 8) | (i << 4) | i);
541
542 for (j = 0; j < cluster->count; j += 2) {
543 int count = RANGE(cluster->registers, j);
544
545 in += CRASHDUMP_READ(in, cluster->registers[j],
546 count, out);
547
548 out += count * sizeof(u32);
549
550 if (i == 0)
551 regcount += count;
552 }
553 }
554
555 CRASHDUMP_FINI(in);
556
557 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
558
559 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
560 return;
561
562 if (a6xx_crashdumper_run(gpu, dumper))
563 return;
564
565 obj->handle = cluster;
566 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
567 datasize);
568 }
569
a6xx_get_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)570 static void a6xx_get_clusters(struct msm_gpu *gpu,
571 struct a6xx_gpu_state *a6xx_state,
572 struct a6xx_crashdumper *dumper)
573 {
574 int i;
575
576 a6xx_state->clusters = state_kcalloc(a6xx_state,
577 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
578
579 if (!a6xx_state->clusters)
580 return;
581
582 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
583
584 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
585 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
586 &a6xx_state->clusters[i], dumper);
587 }
588
589 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
a6xx_get_shader_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_shader_block * block,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)590 static void a6xx_get_shader_block(struct msm_gpu *gpu,
591 struct a6xx_gpu_state *a6xx_state,
592 const struct a6xx_shader_block *block,
593 struct a6xx_gpu_state_obj *obj,
594 struct a6xx_crashdumper *dumper)
595 {
596 u64 *in = dumper->ptr;
597 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
598 int i;
599
600 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
601 return;
602
603 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
604 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
605 (block->type << 8) | i);
606
607 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
608 block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
609 }
610
611 CRASHDUMP_FINI(in);
612
613 if (a6xx_crashdumper_run(gpu, dumper))
614 return;
615
616 obj->handle = block;
617 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
618 datasize);
619 }
620
a6xx_get_shaders(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)621 static void a6xx_get_shaders(struct msm_gpu *gpu,
622 struct a6xx_gpu_state *a6xx_state,
623 struct a6xx_crashdumper *dumper)
624 {
625 int i;
626
627 a6xx_state->shaders = state_kcalloc(a6xx_state,
628 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
629
630 if (!a6xx_state->shaders)
631 return;
632
633 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
634
635 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
636 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
637 &a6xx_state->shaders[i], dumper);
638 }
639
640 /* Read registers from behind the HLSQ aperture with the crashdumper */
a6xx_get_crashdumper_hlsq_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)641 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
642 struct a6xx_gpu_state *a6xx_state,
643 const struct a6xx_registers *regs,
644 struct a6xx_gpu_state_obj *obj,
645 struct a6xx_crashdumper *dumper)
646
647 {
648 u64 *in = dumper->ptr;
649 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
650 int i, regcount = 0;
651
652 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
653
654 for (i = 0; i < regs->count; i += 2) {
655 u32 count = RANGE(regs->registers, i);
656 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
657 regs->registers[i] - (regs->val0 >> 2);
658
659 in += CRASHDUMP_READ(in, offset, count, out);
660
661 out += count * sizeof(u32);
662 regcount += count;
663 }
664
665 CRASHDUMP_FINI(in);
666
667 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
668 return;
669
670 if (a6xx_crashdumper_run(gpu, dumper))
671 return;
672
673 obj->handle = regs;
674 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
675 regcount * sizeof(u32));
676 }
677
678 /* Read a block of registers using the crashdumper */
a6xx_get_crashdumper_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)679 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
680 struct a6xx_gpu_state *a6xx_state,
681 const struct a6xx_registers *regs,
682 struct a6xx_gpu_state_obj *obj,
683 struct a6xx_crashdumper *dumper)
684
685 {
686 u64 *in = dumper->ptr;
687 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
688 int i, regcount = 0;
689
690 /* Some blocks might need to program a selector register first */
691 if (regs->val0)
692 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
693
694 for (i = 0; i < regs->count; i += 2) {
695 u32 count = RANGE(regs->registers, i);
696
697 in += CRASHDUMP_READ(in, regs->registers[i], count, out);
698
699 out += count * sizeof(u32);
700 regcount += count;
701 }
702
703 CRASHDUMP_FINI(in);
704
705 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
706 return;
707
708 if (a6xx_crashdumper_run(gpu, dumper))
709 return;
710
711 obj->handle = regs;
712 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
713 regcount * sizeof(u32));
714 }
715
716 /* Read a block of registers via AHB */
a6xx_get_ahb_gpu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj)717 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
718 struct a6xx_gpu_state *a6xx_state,
719 const struct a6xx_registers *regs,
720 struct a6xx_gpu_state_obj *obj)
721 {
722 int i, regcount = 0, index = 0;
723
724 for (i = 0; i < regs->count; i += 2)
725 regcount += RANGE(regs->registers, i);
726
727 obj->handle = (const void *) regs;
728 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
729 if (!obj->data)
730 return;
731
732 for (i = 0; i < regs->count; i += 2) {
733 u32 count = RANGE(regs->registers, i);
734 int j;
735
736 for (j = 0; j < count; j++)
737 obj->data[index++] = gpu_read(gpu,
738 regs->registers[i] + j);
739 }
740 }
741
742 /* Read a block of GMU registers */
_a6xx_get_gmu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,bool rscc)743 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
744 struct a6xx_gpu_state *a6xx_state,
745 const struct a6xx_registers *regs,
746 struct a6xx_gpu_state_obj *obj,
747 bool rscc)
748 {
749 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
750 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
751 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
752 int i, regcount = 0, index = 0;
753
754 for (i = 0; i < regs->count; i += 2)
755 regcount += RANGE(regs->registers, i);
756
757 obj->handle = (const void *) regs;
758 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
759 if (!obj->data)
760 return;
761
762 for (i = 0; i < regs->count; i += 2) {
763 u32 count = RANGE(regs->registers, i);
764 int j;
765
766 for (j = 0; j < count; j++) {
767 u32 offset = regs->registers[i] + j;
768 u32 val;
769
770 if (rscc)
771 val = gmu_read_rscc(gmu, offset);
772 else
773 val = gmu_read(gmu, offset);
774
775 obj->data[index++] = val;
776 }
777 }
778 }
779
a6xx_get_gmu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)780 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
781 struct a6xx_gpu_state *a6xx_state)
782 {
783 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
784 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
785
786 a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
787 3, sizeof(*a6xx_state->gmu_registers));
788
789 if (!a6xx_state->gmu_registers)
790 return;
791
792 a6xx_state->nr_gmu_registers = 3;
793
794 /* Get the CX GMU registers from AHB */
795 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
796 &a6xx_state->gmu_registers[0], false);
797 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
798 &a6xx_state->gmu_registers[1], true);
799
800 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
801 return;
802
803 /* Set the fence to ALLOW mode so we can access the registers */
804 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
805
806 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
807 &a6xx_state->gmu_registers[2], false);
808 }
809
a6xx_snapshot_gmu_bo(struct a6xx_gpu_state * a6xx_state,struct a6xx_gmu_bo * bo)810 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
811 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
812 {
813 struct msm_gpu_state_bo *snapshot;
814
815 if (!bo->size)
816 return NULL;
817
818 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
819 if (!snapshot)
820 return NULL;
821
822 snapshot->iova = bo->iova;
823 snapshot->size = bo->size;
824 snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
825 if (!snapshot->data)
826 return NULL;
827
828 memcpy(snapshot->data, bo->virt, bo->size);
829
830 return snapshot;
831 }
832
a6xx_snapshot_gmu_hfi_history(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)833 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
834 struct a6xx_gpu_state *a6xx_state)
835 {
836 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
837 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
838 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
839 unsigned i, j;
840
841 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
842
843 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
844 struct a6xx_hfi_queue *queue = &gmu->queues[i];
845 for (j = 0; j < HFI_HISTORY_SZ; j++) {
846 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
847 a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
848 }
849 }
850 }
851
852 #define A6XX_GBIF_REGLIST_SIZE 1
a6xx_get_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)853 static void a6xx_get_registers(struct msm_gpu *gpu,
854 struct a6xx_gpu_state *a6xx_state,
855 struct a6xx_crashdumper *dumper)
856 {
857 int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
858 ARRAY_SIZE(a6xx_reglist) +
859 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
860 int index = 0;
861 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
862
863 a6xx_state->registers = state_kcalloc(a6xx_state,
864 count, sizeof(*a6xx_state->registers));
865
866 if (!a6xx_state->registers)
867 return;
868
869 a6xx_state->nr_registers = count;
870
871 for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
872 a6xx_get_ahb_gpu_registers(gpu,
873 a6xx_state, &a6xx_ahb_reglist[i],
874 &a6xx_state->registers[index++]);
875
876 if (a6xx_has_gbif(adreno_gpu))
877 a6xx_get_ahb_gpu_registers(gpu,
878 a6xx_state, &a6xx_gbif_reglist,
879 &a6xx_state->registers[index++]);
880 else
881 a6xx_get_ahb_gpu_registers(gpu,
882 a6xx_state, &a6xx_vbif_reglist,
883 &a6xx_state->registers[index++]);
884 if (!dumper) {
885 /*
886 * We can't use the crashdumper when the SMMU is stalled,
887 * because the GPU has no memory access until we resume
888 * translation (but we don't want to do that until after
889 * we have captured as much useful GPU state as possible).
890 * So instead collect registers via the CPU:
891 */
892 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
893 a6xx_get_ahb_gpu_registers(gpu,
894 a6xx_state, &a6xx_reglist[i],
895 &a6xx_state->registers[index++]);
896 return;
897 }
898
899 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
900 a6xx_get_crashdumper_registers(gpu,
901 a6xx_state, &a6xx_reglist[i],
902 &a6xx_state->registers[index++],
903 dumper);
904
905 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
906 a6xx_get_crashdumper_hlsq_registers(gpu,
907 a6xx_state, &a6xx_hlsq_reglist[i],
908 &a6xx_state->registers[index++],
909 dumper);
910 }
911
912 /* Read a block of data from an indexed register pair */
a6xx_get_indexed_regs(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_indexed_registers * indexed,struct a6xx_gpu_state_obj * obj)913 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
914 struct a6xx_gpu_state *a6xx_state,
915 const struct a6xx_indexed_registers *indexed,
916 struct a6xx_gpu_state_obj *obj)
917 {
918 int i;
919
920 obj->handle = (const void *) indexed;
921 obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
922 if (!obj->data)
923 return;
924
925 /* All the indexed banks start at address 0 */
926 gpu_write(gpu, indexed->addr, 0);
927
928 /* Read the data - each read increments the internal address by 1 */
929 for (i = 0; i < indexed->count; i++)
930 obj->data[i] = gpu_read(gpu, indexed->data);
931 }
932
a6xx_get_indexed_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)933 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
934 struct a6xx_gpu_state *a6xx_state)
935 {
936 u32 mempool_size;
937 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
938 int i;
939
940 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
941 sizeof(*a6xx_state->indexed_regs));
942 if (!a6xx_state->indexed_regs)
943 return;
944
945 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
946 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
947 &a6xx_state->indexed_regs[i]);
948
949 /* Set the CP mempool size to 0 to stabilize it while dumping */
950 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
951 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
952
953 /* Get the contents of the CP mempool */
954 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
955 &a6xx_state->indexed_regs[i]);
956
957 /*
958 * Offset 0x2000 in the mempool is the size - copy the saved size over
959 * so the data is consistent
960 */
961 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
962
963 /* Restore the size in the hardware */
964 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
965
966 a6xx_state->nr_indexed_regs = count;
967 }
968
a6xx_gpu_state_get(struct msm_gpu * gpu)969 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
970 {
971 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
972 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
973 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
974 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
975 GFP_KERNEL);
976 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
977 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
978
979 if (!a6xx_state)
980 return ERR_PTR(-ENOMEM);
981
982 INIT_LIST_HEAD(&a6xx_state->objs);
983
984 /* Get the generic state from the adreno core */
985 adreno_gpu_state_get(gpu, &a6xx_state->base);
986
987 a6xx_get_gmu_registers(gpu, a6xx_state);
988
989 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
990 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
991 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
992
993 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
994
995 /* If GX isn't on the rest of the data isn't going to be accessible */
996 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
997 return &a6xx_state->base;
998
999 /* Get the banks of indexed registers */
1000 a6xx_get_indexed_registers(gpu, a6xx_state);
1001
1002 /*
1003 * Try to initialize the crashdumper, if we are not dumping state
1004 * with the SMMU stalled. The crashdumper needs memory access to
1005 * write out GPU state, so we need to skip this when the SMMU is
1006 * stalled in response to an iova fault
1007 */
1008 if (!stalled && !gpu->needs_hw_init &&
1009 !a6xx_crashdumper_init(gpu, &_dumper)) {
1010 dumper = &_dumper;
1011 }
1012
1013 a6xx_get_registers(gpu, a6xx_state, dumper);
1014
1015 if (dumper) {
1016 a6xx_get_shaders(gpu, a6xx_state, dumper);
1017 a6xx_get_clusters(gpu, a6xx_state, dumper);
1018 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1019
1020 msm_gem_kernel_put(dumper->bo, gpu->aspace);
1021 }
1022
1023 if (snapshot_debugbus)
1024 a6xx_get_debugbus(gpu, a6xx_state);
1025
1026 a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1027
1028 return &a6xx_state->base;
1029 }
1030
a6xx_gpu_state_destroy(struct kref * kref)1031 static void a6xx_gpu_state_destroy(struct kref *kref)
1032 {
1033 struct a6xx_state_memobj *obj, *tmp;
1034 struct msm_gpu_state *state = container_of(kref,
1035 struct msm_gpu_state, ref);
1036 struct a6xx_gpu_state *a6xx_state = container_of(state,
1037 struct a6xx_gpu_state, base);
1038
1039 if (a6xx_state->gmu_log)
1040 kvfree(a6xx_state->gmu_log->data);
1041
1042 if (a6xx_state->gmu_hfi)
1043 kvfree(a6xx_state->gmu_hfi->data);
1044
1045 if (a6xx_state->gmu_debug)
1046 kvfree(a6xx_state->gmu_debug->data);
1047
1048 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1049 list_del(&obj->node);
1050 kvfree(obj);
1051 }
1052
1053 adreno_gpu_state_destroy(state);
1054 kfree(a6xx_state);
1055 }
1056
a6xx_gpu_state_put(struct msm_gpu_state * state)1057 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1058 {
1059 if (IS_ERR_OR_NULL(state))
1060 return 1;
1061
1062 return kref_put(&state->ref, a6xx_gpu_state_destroy);
1063 }
1064
a6xx_show_registers(const u32 * registers,u32 * data,size_t count,struct drm_printer * p)1065 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1066 struct drm_printer *p)
1067 {
1068 int i, index = 0;
1069
1070 if (!data)
1071 return;
1072
1073 for (i = 0; i < count; i += 2) {
1074 u32 count = RANGE(registers, i);
1075 u32 offset = registers[i];
1076 int j;
1077
1078 for (j = 0; j < count; index++, offset++, j++) {
1079 if (data[index] == 0xdeafbead)
1080 continue;
1081
1082 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1083 offset << 2, data[index]);
1084 }
1085 }
1086 }
1087
print_ascii85(struct drm_printer * p,size_t len,u32 * data)1088 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1089 {
1090 char out[ASCII85_BUFSZ];
1091 long i, l, datalen = 0;
1092
1093 for (i = 0; i < len >> 2; i++) {
1094 if (data[i])
1095 datalen = (i + 1) << 2;
1096 }
1097
1098 if (datalen == 0)
1099 return;
1100
1101 drm_puts(p, " data: !!ascii85 |\n");
1102 drm_puts(p, " ");
1103
1104
1105 l = ascii85_encode_len(datalen);
1106
1107 for (i = 0; i < l; i++)
1108 drm_puts(p, ascii85_encode(data[i], out));
1109
1110 drm_puts(p, "\n");
1111 }
1112
print_name(struct drm_printer * p,const char * fmt,const char * name)1113 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1114 {
1115 drm_puts(p, fmt);
1116 drm_puts(p, name);
1117 drm_puts(p, "\n");
1118 }
1119
a6xx_show_shader(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1120 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1121 struct drm_printer *p)
1122 {
1123 const struct a6xx_shader_block *block = obj->handle;
1124 int i;
1125
1126 if (!obj->handle)
1127 return;
1128
1129 print_name(p, " - type: ", block->name);
1130
1131 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1132 drm_printf(p, " - bank: %d\n", i);
1133 drm_printf(p, " size: %d\n", block->size);
1134
1135 if (!obj->data)
1136 continue;
1137
1138 print_ascii85(p, block->size << 2,
1139 obj->data + (block->size * i));
1140 }
1141 }
1142
a6xx_show_cluster_data(const u32 * registers,int size,u32 * data,struct drm_printer * p)1143 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1144 struct drm_printer *p)
1145 {
1146 int ctx, index = 0;
1147
1148 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1149 int j;
1150
1151 drm_printf(p, " - context: %d\n", ctx);
1152
1153 for (j = 0; j < size; j += 2) {
1154 u32 count = RANGE(registers, j);
1155 u32 offset = registers[j];
1156 int k;
1157
1158 for (k = 0; k < count; index++, offset++, k++) {
1159 if (data[index] == 0xdeafbead)
1160 continue;
1161
1162 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1163 offset << 2, data[index]);
1164 }
1165 }
1166 }
1167 }
1168
a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1169 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1170 struct drm_printer *p)
1171 {
1172 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1173
1174 if (dbgahb) {
1175 print_name(p, " - cluster-name: ", dbgahb->name);
1176 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1177 obj->data, p);
1178 }
1179 }
1180
a6xx_show_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1181 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1182 struct drm_printer *p)
1183 {
1184 const struct a6xx_cluster *cluster = obj->handle;
1185
1186 if (cluster) {
1187 print_name(p, " - cluster-name: ", cluster->name);
1188 a6xx_show_cluster_data(cluster->registers, cluster->count,
1189 obj->data, p);
1190 }
1191 }
1192
a6xx_show_indexed_regs(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1193 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1194 struct drm_printer *p)
1195 {
1196 const struct a6xx_indexed_registers *indexed = obj->handle;
1197
1198 if (!indexed)
1199 return;
1200
1201 print_name(p, " - regs-name: ", indexed->name);
1202 drm_printf(p, " dwords: %d\n", indexed->count);
1203
1204 print_ascii85(p, indexed->count << 2, obj->data);
1205 }
1206
a6xx_show_debugbus_block(const struct a6xx_debugbus_block * block,u32 * data,struct drm_printer * p)1207 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1208 u32 *data, struct drm_printer *p)
1209 {
1210 if (block) {
1211 print_name(p, " - debugbus-block: ", block->name);
1212
1213 /*
1214 * count for regular debugbus data is in quadwords,
1215 * but print the size in dwords for consistency
1216 */
1217 drm_printf(p, " count: %d\n", block->count << 1);
1218
1219 print_ascii85(p, block->count << 3, data);
1220 }
1221 }
1222
a6xx_show_debugbus(struct a6xx_gpu_state * a6xx_state,struct drm_printer * p)1223 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1224 struct drm_printer *p)
1225 {
1226 int i;
1227
1228 for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1229 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1230
1231 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1232 }
1233
1234 if (a6xx_state->vbif_debugbus) {
1235 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1236
1237 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n");
1238 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1239
1240 /* vbif debugbus data is in dwords. Confusing, huh? */
1241 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1242 }
1243
1244 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1245 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1246
1247 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1248 }
1249 }
1250
a6xx_show(struct msm_gpu * gpu,struct msm_gpu_state * state,struct drm_printer * p)1251 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1252 struct drm_printer *p)
1253 {
1254 struct a6xx_gpu_state *a6xx_state = container_of(state,
1255 struct a6xx_gpu_state, base);
1256 int i;
1257
1258 if (IS_ERR_OR_NULL(state))
1259 return;
1260
1261 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1262
1263 adreno_show(gpu, state, p);
1264
1265 drm_puts(p, "gmu-log:\n");
1266 if (a6xx_state->gmu_log) {
1267 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1268
1269 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova);
1270 drm_printf(p, " size: %zu\n", gmu_log->size);
1271 adreno_show_object(p, &gmu_log->data, gmu_log->size,
1272 &gmu_log->encoded);
1273 }
1274
1275 drm_puts(p, "gmu-hfi:\n");
1276 if (a6xx_state->gmu_hfi) {
1277 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1278 unsigned i, j;
1279
1280 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova);
1281 drm_printf(p, " size: %zu\n", gmu_hfi->size);
1282 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1283 drm_printf(p, " queue-history[%u]:", i);
1284 for (j = 0; j < HFI_HISTORY_SZ; j++) {
1285 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1286 }
1287 drm_printf(p, "\n");
1288 }
1289 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1290 &gmu_hfi->encoded);
1291 }
1292
1293 drm_puts(p, "gmu-debug:\n");
1294 if (a6xx_state->gmu_debug) {
1295 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1296
1297 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova);
1298 drm_printf(p, " size: %zu\n", gmu_debug->size);
1299 adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1300 &gmu_debug->encoded);
1301 }
1302
1303 drm_puts(p, "registers:\n");
1304 for (i = 0; i < a6xx_state->nr_registers; i++) {
1305 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1306 const struct a6xx_registers *regs = obj->handle;
1307
1308 if (!obj->handle)
1309 continue;
1310
1311 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1312 }
1313
1314 drm_puts(p, "registers-gmu:\n");
1315 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1316 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1317 const struct a6xx_registers *regs = obj->handle;
1318
1319 if (!obj->handle)
1320 continue;
1321
1322 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1323 }
1324
1325 drm_puts(p, "indexed-registers:\n");
1326 for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1327 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1328
1329 drm_puts(p, "shader-blocks:\n");
1330 for (i = 0; i < a6xx_state->nr_shaders; i++)
1331 a6xx_show_shader(&a6xx_state->shaders[i], p);
1332
1333 drm_puts(p, "clusters:\n");
1334 for (i = 0; i < a6xx_state->nr_clusters; i++)
1335 a6xx_show_cluster(&a6xx_state->clusters[i], p);
1336
1337 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1338 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1339
1340 drm_puts(p, "debugbus:\n");
1341 a6xx_show_debugbus(a6xx_state, p);
1342 }
1343