1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3 */
4
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18
19 #define GPU_PAS_ID 13
20
a5xx_flush(struct msm_gpu * gpu,struct msm_ringbuffer * ring,bool sync)21 void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
22 bool sync)
23 {
24 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
25 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
26 uint32_t wptr;
27 unsigned long flags;
28
29 /*
30 * Most flush operations need to issue a WHERE_AM_I opcode to sync up
31 * the rptr shadow
32 */
33 if (a5xx_gpu->has_whereami && sync) {
34 OUT_PKT7(ring, CP_WHERE_AM_I, 2);
35 OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
36 OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
37 }
38
39 spin_lock_irqsave(&ring->preempt_lock, flags);
40
41 /* Copy the shadow to the actual register */
42 ring->cur = ring->next;
43
44 /* Make sure to wrap wptr if we need to */
45 wptr = get_wptr(ring);
46
47 spin_unlock_irqrestore(&ring->preempt_lock, flags);
48
49 /* Make sure everything is posted before making a decision */
50 mb();
51
52 /* Update HW if this is the current ring and we are not in preempt */
53 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
54 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
55 }
56
a5xx_submit_in_rb(struct msm_gpu * gpu,struct msm_gem_submit * submit)57 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
58 {
59 struct msm_drm_private *priv = gpu->dev->dev_private;
60 struct msm_ringbuffer *ring = submit->ring;
61 struct msm_gem_object *obj;
62 uint32_t *ptr, dwords;
63 unsigned int i;
64
65 for (i = 0; i < submit->nr_cmds; i++) {
66 switch (submit->cmd[i].type) {
67 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
68 break;
69 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
70 if (priv->lastctx == submit->queue->ctx)
71 break;
72 fallthrough;
73 case MSM_SUBMIT_CMD_BUF:
74 /* copy commands into RB: */
75 obj = submit->bos[submit->cmd[i].idx].obj;
76 dwords = submit->cmd[i].size;
77
78 ptr = msm_gem_get_vaddr(&obj->base);
79
80 /* _get_vaddr() shouldn't fail at this point,
81 * since we've already mapped it once in
82 * submit_reloc()
83 */
84 if (WARN_ON(IS_ERR_OR_NULL(ptr)))
85 return;
86
87 for (i = 0; i < dwords; i++) {
88 /* normally the OUT_PKTn() would wait
89 * for space for the packet. But since
90 * we just OUT_RING() the whole thing,
91 * need to call adreno_wait_ring()
92 * ourself:
93 */
94 adreno_wait_ring(ring, 1);
95 OUT_RING(ring, ptr[i]);
96 }
97
98 msm_gem_put_vaddr(&obj->base);
99
100 break;
101 }
102 }
103
104 a5xx_flush(gpu, ring, true);
105 a5xx_preempt_trigger(gpu);
106
107 /* we might not necessarily have a cmd from userspace to
108 * trigger an event to know that submit has completed, so
109 * do this manually:
110 */
111 a5xx_idle(gpu, ring);
112 ring->memptrs->fence = submit->seqno;
113 msm_gpu_retire(gpu);
114 }
115
a5xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)116 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
117 {
118 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
119 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
120 struct msm_drm_private *priv = gpu->dev->dev_private;
121 struct msm_ringbuffer *ring = submit->ring;
122 unsigned int i, ibs = 0;
123
124 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
125 priv->lastctx = NULL;
126 a5xx_submit_in_rb(gpu, submit);
127 return;
128 }
129
130 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
131 OUT_RING(ring, 0x02);
132
133 /* Turn off protected mode to write to special registers */
134 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135 OUT_RING(ring, 0);
136
137 /* Set the save preemption record for the ring/command */
138 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
139 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
140 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
141
142 /* Turn back on protected mode */
143 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
144 OUT_RING(ring, 1);
145
146 /* Enable local preemption for finegrain preemption */
147 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
148 OUT_RING(ring, 0x1);
149
150 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
151 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
152 OUT_RING(ring, 0x02);
153
154 /* Submit the commands */
155 for (i = 0; i < submit->nr_cmds; i++) {
156 switch (submit->cmd[i].type) {
157 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
158 break;
159 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
160 if (priv->lastctx == submit->queue->ctx)
161 break;
162 fallthrough;
163 case MSM_SUBMIT_CMD_BUF:
164 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
165 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
166 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
167 OUT_RING(ring, submit->cmd[i].size);
168 ibs++;
169 break;
170 }
171 }
172
173 /*
174 * Write the render mode to NULL (0) to indicate to the CP that the IBs
175 * are done rendering - otherwise a lucky preemption would start
176 * replaying from the last checkpoint
177 */
178 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
179 OUT_RING(ring, 0);
180 OUT_RING(ring, 0);
181 OUT_RING(ring, 0);
182 OUT_RING(ring, 0);
183 OUT_RING(ring, 0);
184
185 /* Turn off IB level preemptions */
186 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
187 OUT_RING(ring, 0x01);
188
189 /* Write the fence to the scratch register */
190 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
191 OUT_RING(ring, submit->seqno);
192
193 /*
194 * Execute a CACHE_FLUSH_TS event. This will ensure that the
195 * timestamp is written to the memory and then triggers the interrupt
196 */
197 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
198 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
199 CP_EVENT_WRITE_0_IRQ);
200 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
201 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
202 OUT_RING(ring, submit->seqno);
203
204 /* Yield the floor on command completion */
205 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
206 /*
207 * If dword[2:1] are non zero, they specify an address for the CP to
208 * write the value of dword[3] to on preemption complete. Write 0 to
209 * skip the write
210 */
211 OUT_RING(ring, 0x00);
212 OUT_RING(ring, 0x00);
213 /* Data value - not used if the address above is 0 */
214 OUT_RING(ring, 0x01);
215 /* Set bit 0 to trigger an interrupt on preempt complete */
216 OUT_RING(ring, 0x01);
217
218 /* A WHERE_AM_I packet is not needed after a YIELD */
219 a5xx_flush(gpu, ring, false);
220
221 /* Check to see if we need to start preemption */
222 a5xx_preempt_trigger(gpu);
223 }
224
225 static const struct {
226 u32 offset;
227 u32 value;
228 } a5xx_hwcg[] = {
229 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
230 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
231 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
232 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
233 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
234 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
235 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
236 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
237 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
238 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
239 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
240 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
241 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
242 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
243 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
244 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
245 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
246 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
247 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
248 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
249 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
250 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
251 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
252 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
253 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
254 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
255 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
256 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
257 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
258 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
259 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
260 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
261 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
262 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
263 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
264 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
265 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
266 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
267 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
268 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
269 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
270 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
271 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
272 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
273 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
274 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
275 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
276 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
277 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
278 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
279 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
280 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
281 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
282 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
283 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
284 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
285 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
286 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
287 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
288 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
289 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
290 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
291 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
292 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
293 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
294 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
295 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
296 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
297 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
298 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
299 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
300 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
301 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
302 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
303 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
304 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
305 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
306 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
307 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
308 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
309 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
310 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
311 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
312 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
313 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
314 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
315 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
316 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
317 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
318 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
319 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
320 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
321 };
322
a5xx_set_hwcg(struct msm_gpu * gpu,bool state)323 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
324 {
325 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
326 unsigned int i;
327
328 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
329 gpu_write(gpu, a5xx_hwcg[i].offset,
330 state ? a5xx_hwcg[i].value : 0);
331
332 if (adreno_is_a540(adreno_gpu)) {
333 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
334 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
335 }
336
337 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
338 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
339 }
340
a5xx_me_init(struct msm_gpu * gpu)341 static int a5xx_me_init(struct msm_gpu *gpu)
342 {
343 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
344 struct msm_ringbuffer *ring = gpu->rb[0];
345
346 OUT_PKT7(ring, CP_ME_INIT, 8);
347
348 OUT_RING(ring, 0x0000002F);
349
350 /* Enable multiple hardware contexts */
351 OUT_RING(ring, 0x00000003);
352
353 /* Enable error detection */
354 OUT_RING(ring, 0x20000000);
355
356 /* Don't enable header dump */
357 OUT_RING(ring, 0x00000000);
358 OUT_RING(ring, 0x00000000);
359
360 /* Specify workarounds for various microcode issues */
361 if (adreno_is_a530(adreno_gpu)) {
362 /* Workaround for token end syncs
363 * Force a WFI after every direct-render 3D mode draw and every
364 * 2D mode 3 draw
365 */
366 OUT_RING(ring, 0x0000000B);
367 } else if (adreno_is_a510(adreno_gpu)) {
368 /* Workaround for token and syncs */
369 OUT_RING(ring, 0x00000001);
370 } else {
371 /* No workarounds enabled */
372 OUT_RING(ring, 0x00000000);
373 }
374
375 OUT_RING(ring, 0x00000000);
376 OUT_RING(ring, 0x00000000);
377
378 a5xx_flush(gpu, ring, true);
379 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
380 }
381
a5xx_preempt_start(struct msm_gpu * gpu)382 static int a5xx_preempt_start(struct msm_gpu *gpu)
383 {
384 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
385 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
386 struct msm_ringbuffer *ring = gpu->rb[0];
387
388 if (gpu->nr_rings == 1)
389 return 0;
390
391 /* Turn off protected mode to write to special registers */
392 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
393 OUT_RING(ring, 0);
394
395 /* Set the save preemption record for the ring/command */
396 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
397 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
398 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
399
400 /* Turn back on protected mode */
401 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
402 OUT_RING(ring, 1);
403
404 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
405 OUT_RING(ring, 0x00);
406
407 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
408 OUT_RING(ring, 0x01);
409
410 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
411 OUT_RING(ring, 0x01);
412
413 /* Yield the floor on command completion */
414 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
415 OUT_RING(ring, 0x00);
416 OUT_RING(ring, 0x00);
417 OUT_RING(ring, 0x01);
418 OUT_RING(ring, 0x01);
419
420 /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
421 a5xx_flush(gpu, ring, false);
422
423 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
424 }
425
a5xx_ucode_check_version(struct a5xx_gpu * a5xx_gpu,struct drm_gem_object * obj)426 static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
427 struct drm_gem_object *obj)
428 {
429 u32 *buf = msm_gem_get_vaddr_active(obj);
430
431 if (IS_ERR(buf))
432 return;
433
434 /*
435 * If the lowest nibble is 0xa that is an indication that this microcode
436 * has been patched. The actual version is in dword [3] but we only care
437 * about the patchlevel which is the lowest nibble of dword [3]
438 */
439 if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
440 a5xx_gpu->has_whereami = true;
441
442 msm_gem_put_vaddr(obj);
443 }
444
a5xx_ucode_init(struct msm_gpu * gpu)445 static int a5xx_ucode_init(struct msm_gpu *gpu)
446 {
447 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
448 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
449 int ret;
450
451 if (!a5xx_gpu->pm4_bo) {
452 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
453 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
454
455
456 if (IS_ERR(a5xx_gpu->pm4_bo)) {
457 ret = PTR_ERR(a5xx_gpu->pm4_bo);
458 a5xx_gpu->pm4_bo = NULL;
459 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
460 ret);
461 return ret;
462 }
463
464 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
465 }
466
467 if (!a5xx_gpu->pfp_bo) {
468 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
469 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
470
471 if (IS_ERR(a5xx_gpu->pfp_bo)) {
472 ret = PTR_ERR(a5xx_gpu->pfp_bo);
473 a5xx_gpu->pfp_bo = NULL;
474 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
475 ret);
476 return ret;
477 }
478
479 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
480 a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
481 }
482
483 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
484 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
485
486 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
487 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
488
489 return 0;
490 }
491
492 #define SCM_GPU_ZAP_SHADER_RESUME 0
493
a5xx_zap_shader_resume(struct msm_gpu * gpu)494 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
495 {
496 int ret;
497
498 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
499 if (ret)
500 DRM_ERROR("%s: zap-shader resume failed: %d\n",
501 gpu->name, ret);
502
503 return ret;
504 }
505
a5xx_zap_shader_init(struct msm_gpu * gpu)506 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
507 {
508 static bool loaded;
509 int ret;
510
511 /*
512 * If the zap shader is already loaded into memory we just need to kick
513 * the remote processor to reinitialize it
514 */
515 if (loaded)
516 return a5xx_zap_shader_resume(gpu);
517
518 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
519
520 loaded = !ret;
521 return ret;
522 }
523
524 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
525 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
526 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
527 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
528 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
529 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
530 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
531 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
532 A5XX_RBBM_INT_0_MASK_CP_SW | \
533 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
534 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
535 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
536
a5xx_hw_init(struct msm_gpu * gpu)537 static int a5xx_hw_init(struct msm_gpu *gpu)
538 {
539 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
540 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
541 int ret;
542
543 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
544
545 if (adreno_is_a540(adreno_gpu))
546 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
547
548 /* Make all blocks contribute to the GPU BUSY perf counter */
549 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
550
551 /* Enable RBBM error reporting bits */
552 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
553
554 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
555 /*
556 * Mask out the activity signals from RB1-3 to avoid false
557 * positives
558 */
559
560 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
561 0xF0000000);
562 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
563 0xFFFFFFFF);
564 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
565 0xFFFFFFFF);
566 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
567 0xFFFFFFFF);
568 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
569 0xFFFFFFFF);
570 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
571 0xFFFFFFFF);
572 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
573 0xFFFFFFFF);
574 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
575 0xFFFFFFFF);
576 }
577
578 /* Enable fault detection */
579 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
580 (1 << 30) | 0xFFFF);
581
582 /* Turn on performance counters */
583 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
584
585 /* Select CP0 to always count cycles */
586 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
587
588 /* Select RBBM0 to countable 6 to get the busy status for devfreq */
589 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
590
591 /* Increase VFD cache access so LRZ and other data gets evicted less */
592 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
593
594 /* Disable L2 bypass in the UCHE */
595 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
596 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
597 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
598 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
599
600 /* Set the GMEM VA range (0 to gpu->gmem) */
601 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
602 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
603 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
604 0x00100000 + adreno_gpu->gmem - 1);
605 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
606
607 if (adreno_is_a510(adreno_gpu)) {
608 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
609 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
610 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
611 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
612 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
613 (0x200 << 11 | 0x200 << 22));
614 } else {
615 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
616 if (adreno_is_a530(adreno_gpu))
617 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
618 if (adreno_is_a540(adreno_gpu))
619 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
620 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
621 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
622 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
623 (0x400 << 11 | 0x300 << 22));
624 }
625
626 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
627 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
628
629 /* Enable USE_RETENTION_FLOPS */
630 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
631
632 /* Enable ME/PFP split notification */
633 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
634
635 /*
636 * In A5x, CCU can send context_done event of a particular context to
637 * UCHE which ultimately reaches CP even when there is valid
638 * transaction of that context inside CCU. This can let CP to program
639 * config registers, which will make the "valid transaction" inside
640 * CCU to be interpreted differently. This can cause gpu fault. This
641 * bug is fixed in latest A510 revision. To enable this bug fix -
642 * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
643 * (disable). For older A510 version this bit is unused.
644 */
645 if (adreno_is_a510(adreno_gpu))
646 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
647
648 /* Enable HWCG */
649 a5xx_set_hwcg(gpu, true);
650
651 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
652
653 /* Set the highest bank bit */
654 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
655 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
656 if (adreno_is_a540(adreno_gpu))
657 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
658
659 /* Protect registers from the CP */
660 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
661
662 /* RBBM */
663 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
664 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
665 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
666 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
667 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
668 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
669
670 /* Content protect */
671 gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
672 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
673 16));
674 gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
675 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
676
677 /* CP */
678 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
679 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
680 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
681 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
682
683 /* RB */
684 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
685 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
686
687 /* VPC */
688 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
689 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
690
691 /* UCHE */
692 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
693
694 if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
695 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
696 ADRENO_PROTECT_RW(0x10000, 0x8000));
697
698 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
699 /*
700 * Disable the trusted memory range - we don't actually supported secure
701 * memory rendering at this point in time and we don't want to block off
702 * part of the virtual memory space.
703 */
704 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
705 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
706 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
707
708 /* Put the GPU into 64 bit by default */
709 gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
710 gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
711 gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
712 gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
713 gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
714 gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
715 gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
716 gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
717 gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
718 gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
719 gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
720 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
721
722 /*
723 * VPC corner case with local memory load kill leads to corrupt
724 * internal state. Normal Disable does not work for all a5x chips.
725 * So do the following setting to disable it.
726 */
727 if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
728 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
729 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
730 }
731
732 ret = adreno_hw_init(gpu);
733 if (ret)
734 return ret;
735
736 if (!adreno_is_a510(adreno_gpu))
737 a5xx_gpmu_ucode_init(gpu);
738
739 ret = a5xx_ucode_init(gpu);
740 if (ret)
741 return ret;
742
743 /* Set the ringbuffer address */
744 gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
745 gpu->rb[0]->iova);
746
747 /*
748 * If the microcode supports the WHERE_AM_I opcode then we can use that
749 * in lieu of the RPTR shadow and enable preemption. Otherwise, we
750 * can't safely use the RPTR shadow or preemption. In either case, the
751 * RPTR shadow should be disabled in hardware.
752 */
753 gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
754 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
755
756 /* Create a privileged buffer for the RPTR shadow */
757 if (a5xx_gpu->has_whereami) {
758 if (!a5xx_gpu->shadow_bo) {
759 a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
760 sizeof(u32) * gpu->nr_rings,
761 MSM_BO_UNCACHED | MSM_BO_MAP_PRIV,
762 gpu->aspace, &a5xx_gpu->shadow_bo,
763 &a5xx_gpu->shadow_iova);
764
765 if (IS_ERR(a5xx_gpu->shadow))
766 return PTR_ERR(a5xx_gpu->shadow);
767 }
768
769 gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
770 REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
771 } else if (gpu->nr_rings > 1) {
772 /* Disable preemption if WHERE_AM_I isn't available */
773 a5xx_preempt_fini(gpu);
774 gpu->nr_rings = 1;
775 }
776
777 a5xx_preempt_hw_init(gpu);
778
779 /* Disable the interrupts through the initial bringup stage */
780 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
781
782 /* Clear ME_HALT to start the micro engine */
783 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
784 ret = a5xx_me_init(gpu);
785 if (ret)
786 return ret;
787
788 ret = a5xx_power_init(gpu);
789 if (ret)
790 return ret;
791
792 /*
793 * Send a pipeline event stat to get misbehaving counters to start
794 * ticking correctly
795 */
796 if (adreno_is_a530(adreno_gpu)) {
797 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
798 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
799
800 a5xx_flush(gpu, gpu->rb[0], true);
801 if (!a5xx_idle(gpu, gpu->rb[0]))
802 return -EINVAL;
803 }
804
805 /*
806 * If the chip that we are using does support loading one, then
807 * try to load a zap shader into the secure world. If successful
808 * we can use the CP to switch out of secure mode. If not then we
809 * have no resource but to try to switch ourselves out manually. If we
810 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
811 * be blocked and a permissions violation will soon follow.
812 */
813 ret = a5xx_zap_shader_init(gpu);
814 if (!ret) {
815 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
816 OUT_RING(gpu->rb[0], 0x00000000);
817
818 a5xx_flush(gpu, gpu->rb[0], true);
819 if (!a5xx_idle(gpu, gpu->rb[0]))
820 return -EINVAL;
821 } else if (ret == -ENODEV) {
822 /*
823 * This device does not use zap shader (but print a warning
824 * just in case someone got their dt wrong.. hopefully they
825 * have a debug UART to realize the error of their ways...
826 * if you mess this up you are about to crash horribly)
827 */
828 dev_warn_once(gpu->dev->dev,
829 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
830 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
831 } else {
832 return ret;
833 }
834
835 /* Last step - yield the ringbuffer */
836 a5xx_preempt_start(gpu);
837
838 return 0;
839 }
840
a5xx_recover(struct msm_gpu * gpu)841 static void a5xx_recover(struct msm_gpu *gpu)
842 {
843 int i;
844
845 adreno_dump_info(gpu);
846
847 for (i = 0; i < 8; i++) {
848 printk("CP_SCRATCH_REG%d: %u\n", i,
849 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
850 }
851
852 if (hang_debug)
853 a5xx_dump(gpu);
854
855 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
856 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
857 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
858 adreno_recover(gpu);
859 }
860
a5xx_destroy(struct msm_gpu * gpu)861 static void a5xx_destroy(struct msm_gpu *gpu)
862 {
863 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
864 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
865
866 DBG("%s", gpu->name);
867
868 a5xx_preempt_fini(gpu);
869
870 if (a5xx_gpu->pm4_bo) {
871 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
872 drm_gem_object_put(a5xx_gpu->pm4_bo);
873 }
874
875 if (a5xx_gpu->pfp_bo) {
876 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
877 drm_gem_object_put(a5xx_gpu->pfp_bo);
878 }
879
880 if (a5xx_gpu->gpmu_bo) {
881 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
882 drm_gem_object_put(a5xx_gpu->gpmu_bo);
883 }
884
885 if (a5xx_gpu->shadow_bo) {
886 msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
887 drm_gem_object_put(a5xx_gpu->shadow_bo);
888 }
889
890 adreno_gpu_cleanup(adreno_gpu);
891 kfree(a5xx_gpu);
892 }
893
_a5xx_check_idle(struct msm_gpu * gpu)894 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
895 {
896 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
897 return false;
898
899 /*
900 * Nearly every abnormality ends up pausing the GPU and triggering a
901 * fault so we can safely just watch for this one interrupt to fire
902 */
903 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
904 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
905 }
906
a5xx_idle(struct msm_gpu * gpu,struct msm_ringbuffer * ring)907 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
908 {
909 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
910 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
911
912 if (ring != a5xx_gpu->cur_ring) {
913 WARN(1, "Tried to idle a non-current ringbuffer\n");
914 return false;
915 }
916
917 /* wait for CP to drain ringbuffer: */
918 if (!adreno_idle(gpu, ring))
919 return false;
920
921 if (spin_until(_a5xx_check_idle(gpu))) {
922 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
923 gpu->name, __builtin_return_address(0),
924 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
925 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
926 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
927 gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
928 return false;
929 }
930
931 return true;
932 }
933
a5xx_fault_handler(void * arg,unsigned long iova,int flags)934 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
935 {
936 struct msm_gpu *gpu = arg;
937 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
938 iova, flags,
939 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
940 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
941 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
942 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
943
944 return -EFAULT;
945 }
946
a5xx_cp_err_irq(struct msm_gpu * gpu)947 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
948 {
949 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
950
951 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
952 u32 val;
953
954 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
955
956 /*
957 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
958 * read it twice
959 */
960
961 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
962 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
963
964 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
965 val);
966 }
967
968 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
969 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
970 gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
971
972 if (status & A5XX_CP_INT_CP_DMA_ERROR)
973 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
974
975 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
976 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
977
978 dev_err_ratelimited(gpu->dev->dev,
979 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
980 val & (1 << 24) ? "WRITE" : "READ",
981 (val & 0xFFFFF) >> 2, val);
982 }
983
984 if (status & A5XX_CP_INT_CP_AHB_ERROR) {
985 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
986 const char *access[16] = { "reserved", "reserved",
987 "timestamp lo", "timestamp hi", "pfp read", "pfp write",
988 "", "", "me read", "me write", "", "", "crashdump read",
989 "crashdump write" };
990
991 dev_err_ratelimited(gpu->dev->dev,
992 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
993 status & 0xFFFFF, access[(status >> 24) & 0xF],
994 (status & (1 << 31)), status);
995 }
996 }
997
a5xx_rbbm_err_irq(struct msm_gpu * gpu,u32 status)998 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
999 {
1000 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1001 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1002
1003 dev_err_ratelimited(gpu->dev->dev,
1004 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
1005 val & (1 << 28) ? "WRITE" : "READ",
1006 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
1007 (val >> 24) & 0xF);
1008
1009 /* Clear the error */
1010 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
1011
1012 /* Clear the interrupt */
1013 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1014 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1015 }
1016
1017 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1018 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1019
1020 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1021 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1022 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1023
1024 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1025 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1026 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1027
1028 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1029 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1030 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1031
1032 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1033 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1034
1035 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1036 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1037 }
1038
a5xx_uche_err_irq(struct msm_gpu * gpu)1039 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1040 {
1041 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1042
1043 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1044
1045 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1046 addr);
1047 }
1048
a5xx_gpmu_err_irq(struct msm_gpu * gpu)1049 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1050 {
1051 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1052 }
1053
a5xx_fault_detect_irq(struct msm_gpu * gpu)1054 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1055 {
1056 struct drm_device *dev = gpu->dev;
1057 struct msm_drm_private *priv = dev->dev_private;
1058 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1059
1060 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1061 ring ? ring->id : -1, ring ? ring->seqno : 0,
1062 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1063 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1064 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1065 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1066 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1067 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1068 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1069
1070 /* Turn off the hangcheck timer to keep it from bothering us */
1071 del_timer(&gpu->hangcheck_timer);
1072
1073 queue_work(priv->wq, &gpu->recover_work);
1074 }
1075
1076 #define RBBM_ERROR_MASK \
1077 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1078 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1079 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1080 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1081 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1082 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1083
a5xx_irq(struct msm_gpu * gpu)1084 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1085 {
1086 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1087
1088 /*
1089 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1090 * before the source is cleared the interrupt will storm.
1091 */
1092 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1093 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1094
1095 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1096 if (status & RBBM_ERROR_MASK)
1097 a5xx_rbbm_err_irq(gpu, status);
1098
1099 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1100 a5xx_cp_err_irq(gpu);
1101
1102 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1103 a5xx_fault_detect_irq(gpu);
1104
1105 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1106 a5xx_uche_err_irq(gpu);
1107
1108 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1109 a5xx_gpmu_err_irq(gpu);
1110
1111 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1112 a5xx_preempt_trigger(gpu);
1113 msm_gpu_retire(gpu);
1114 }
1115
1116 if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1117 a5xx_preempt_irq(gpu);
1118
1119 return IRQ_HANDLED;
1120 }
1121
1122 static const u32 a5xx_registers[] = {
1123 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1124 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1125 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1126 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1127 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1128 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1129 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1130 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1131 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1132 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1133 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1134 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1135 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1136 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1137 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1138 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1139 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1140 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1141 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1142 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1143 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1144 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1145 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1146 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1147 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1148 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1149 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1150 0xAC60, 0xAC60, ~0,
1151 };
1152
a5xx_dump(struct msm_gpu * gpu)1153 static void a5xx_dump(struct msm_gpu *gpu)
1154 {
1155 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
1156 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1157 adreno_dump(gpu);
1158 }
1159
a5xx_pm_resume(struct msm_gpu * gpu)1160 static int a5xx_pm_resume(struct msm_gpu *gpu)
1161 {
1162 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1163 int ret;
1164
1165 /* Turn on the core power */
1166 ret = msm_gpu_pm_resume(gpu);
1167 if (ret)
1168 return ret;
1169
1170 if (adreno_is_a510(adreno_gpu)) {
1171 /* Halt the sp_input_clk at HM level */
1172 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1173 a5xx_set_hwcg(gpu, true);
1174 /* Turn on sp_input_clk at HM level */
1175 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1176 return 0;
1177 }
1178
1179 /* Turn the RBCCU domain first to limit the chances of voltage droop */
1180 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1181
1182 /* Wait 3 usecs before polling */
1183 udelay(3);
1184
1185 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1186 (1 << 20), (1 << 20));
1187 if (ret) {
1188 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1189 gpu->name,
1190 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1191 return ret;
1192 }
1193
1194 /* Turn on the SP domain */
1195 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1196 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1197 (1 << 20), (1 << 20));
1198 if (ret)
1199 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1200 gpu->name);
1201
1202 return ret;
1203 }
1204
a5xx_pm_suspend(struct msm_gpu * gpu)1205 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1206 {
1207 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1208 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1209 u32 mask = 0xf;
1210 int i, ret;
1211
1212 /* A510 has 3 XIN ports in VBIF */
1213 if (adreno_is_a510(adreno_gpu))
1214 mask = 0x7;
1215
1216 /* Clear the VBIF pipe before shutting down */
1217 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1218 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1219 mask) == mask);
1220
1221 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1222
1223 /*
1224 * Reset the VBIF before power collapse to avoid issue with FIFO
1225 * entries
1226 */
1227 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1228 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1229
1230 ret = msm_gpu_pm_suspend(gpu);
1231 if (ret)
1232 return ret;
1233
1234 if (a5xx_gpu->has_whereami)
1235 for (i = 0; i < gpu->nr_rings; i++)
1236 a5xx_gpu->shadow[i] = 0;
1237
1238 return 0;
1239 }
1240
a5xx_get_timestamp(struct msm_gpu * gpu,uint64_t * value)1241 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1242 {
1243 *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
1244 REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
1245
1246 return 0;
1247 }
1248
1249 struct a5xx_crashdumper {
1250 void *ptr;
1251 struct drm_gem_object *bo;
1252 u64 iova;
1253 };
1254
1255 struct a5xx_gpu_state {
1256 struct msm_gpu_state base;
1257 u32 *hlsqregs;
1258 };
1259
a5xx_crashdumper_init(struct msm_gpu * gpu,struct a5xx_crashdumper * dumper)1260 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1261 struct a5xx_crashdumper *dumper)
1262 {
1263 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1264 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1265 &dumper->bo, &dumper->iova);
1266
1267 if (!IS_ERR(dumper->ptr))
1268 msm_gem_object_set_name(dumper->bo, "crashdump");
1269
1270 return PTR_ERR_OR_ZERO(dumper->ptr);
1271 }
1272
a5xx_crashdumper_run(struct msm_gpu * gpu,struct a5xx_crashdumper * dumper)1273 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1274 struct a5xx_crashdumper *dumper)
1275 {
1276 u32 val;
1277
1278 if (IS_ERR_OR_NULL(dumper->ptr))
1279 return -EINVAL;
1280
1281 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1282 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1283
1284 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1285
1286 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1287 val & 0x04, 100, 10000);
1288 }
1289
1290 /*
1291 * These are a list of the registers that need to be read through the HLSQ
1292 * aperture through the crashdumper. These are not nominally accessible from
1293 * the CPU on a secure platform.
1294 */
1295 static const struct {
1296 u32 type;
1297 u32 regoffset;
1298 u32 count;
1299 } a5xx_hlsq_aperture_regs[] = {
1300 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */
1301 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */
1302 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */
1303 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */
1304 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */
1305 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */
1306 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */
1307 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */
1308 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1309 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1310 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */
1311 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */
1312 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */
1313 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */
1314 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */
1315 };
1316
a5xx_gpu_state_get_hlsq_regs(struct msm_gpu * gpu,struct a5xx_gpu_state * a5xx_state)1317 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1318 struct a5xx_gpu_state *a5xx_state)
1319 {
1320 struct a5xx_crashdumper dumper = { 0 };
1321 u32 offset, count = 0;
1322 u64 *ptr;
1323 int i;
1324
1325 if (a5xx_crashdumper_init(gpu, &dumper))
1326 return;
1327
1328 /* The script will be written at offset 0 */
1329 ptr = dumper.ptr;
1330
1331 /* Start writing the data at offset 256k */
1332 offset = dumper.iova + (256 * SZ_1K);
1333
1334 /* Count how many additional registers to get from the HLSQ aperture */
1335 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1336 count += a5xx_hlsq_aperture_regs[i].count;
1337
1338 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1339 if (!a5xx_state->hlsqregs)
1340 return;
1341
1342 /* Build the crashdump script */
1343 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1344 u32 type = a5xx_hlsq_aperture_regs[i].type;
1345 u32 c = a5xx_hlsq_aperture_regs[i].count;
1346
1347 /* Write the register to select the desired bank */
1348 *ptr++ = ((u64) type << 8);
1349 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1350 (1 << 21) | 1;
1351
1352 *ptr++ = offset;
1353 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1354 | c;
1355
1356 offset += c * sizeof(u32);
1357 }
1358
1359 /* Write two zeros to close off the script */
1360 *ptr++ = 0;
1361 *ptr++ = 0;
1362
1363 if (a5xx_crashdumper_run(gpu, &dumper)) {
1364 kfree(a5xx_state->hlsqregs);
1365 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1366 return;
1367 }
1368
1369 /* Copy the data from the crashdumper to the state */
1370 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1371 count * sizeof(u32));
1372
1373 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1374 }
1375
a5xx_gpu_state_get(struct msm_gpu * gpu)1376 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1377 {
1378 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1379 GFP_KERNEL);
1380
1381 if (!a5xx_state)
1382 return ERR_PTR(-ENOMEM);
1383
1384 /* Temporarily disable hardware clock gating before reading the hw */
1385 a5xx_set_hwcg(gpu, false);
1386
1387 /* First get the generic state from the adreno core */
1388 adreno_gpu_state_get(gpu, &(a5xx_state->base));
1389
1390 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1391
1392 /* Get the HLSQ regs with the help of the crashdumper */
1393 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1394
1395 a5xx_set_hwcg(gpu, true);
1396
1397 return &a5xx_state->base;
1398 }
1399
a5xx_gpu_state_destroy(struct kref * kref)1400 static void a5xx_gpu_state_destroy(struct kref *kref)
1401 {
1402 struct msm_gpu_state *state = container_of(kref,
1403 struct msm_gpu_state, ref);
1404 struct a5xx_gpu_state *a5xx_state = container_of(state,
1405 struct a5xx_gpu_state, base);
1406
1407 kfree(a5xx_state->hlsqregs);
1408
1409 adreno_gpu_state_destroy(state);
1410 kfree(a5xx_state);
1411 }
1412
a5xx_gpu_state_put(struct msm_gpu_state * state)1413 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1414 {
1415 if (IS_ERR_OR_NULL(state))
1416 return 1;
1417
1418 return kref_put(&state->ref, a5xx_gpu_state_destroy);
1419 }
1420
1421
1422 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
a5xx_show(struct msm_gpu * gpu,struct msm_gpu_state * state,struct drm_printer * p)1423 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1424 struct drm_printer *p)
1425 {
1426 int i, j;
1427 u32 pos = 0;
1428 struct a5xx_gpu_state *a5xx_state = container_of(state,
1429 struct a5xx_gpu_state, base);
1430
1431 if (IS_ERR_OR_NULL(state))
1432 return;
1433
1434 adreno_show(gpu, state, p);
1435
1436 /* Dump the additional a5xx HLSQ registers */
1437 if (!a5xx_state->hlsqregs)
1438 return;
1439
1440 drm_printf(p, "registers-hlsq:\n");
1441
1442 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1443 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1444 u32 c = a5xx_hlsq_aperture_regs[i].count;
1445
1446 for (j = 0; j < c; j++, pos++, o++) {
1447 /*
1448 * To keep the crashdump simple we pull the entire range
1449 * for each register type but not all of the registers
1450 * in the range are valid. Fortunately invalid registers
1451 * stick out like a sore thumb with a value of
1452 * 0xdeadbeef
1453 */
1454 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1455 continue;
1456
1457 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n",
1458 o << 2, a5xx_state->hlsqregs[pos]);
1459 }
1460 }
1461 }
1462 #endif
1463
a5xx_active_ring(struct msm_gpu * gpu)1464 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1465 {
1466 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1467 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1468
1469 return a5xx_gpu->cur_ring;
1470 }
1471
a5xx_gpu_busy(struct msm_gpu * gpu)1472 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1473 {
1474 u64 busy_cycles, busy_time;
1475
1476 /* Only read the gpu busy if the hardware is already active */
1477 if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1478 return 0;
1479
1480 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1481 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1482
1483 busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1484 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1485
1486 gpu->devfreq.busy_cycles = busy_cycles;
1487
1488 pm_runtime_put(&gpu->pdev->dev);
1489
1490 if (WARN_ON(busy_time > ~0LU))
1491 return ~0LU;
1492
1493 return (unsigned long)busy_time;
1494 }
1495
a5xx_get_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)1496 static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1497 {
1498 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1499 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1500
1501 if (a5xx_gpu->has_whereami)
1502 return a5xx_gpu->shadow[ring->id];
1503
1504 return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1505 }
1506
1507 static const struct adreno_gpu_funcs funcs = {
1508 .base = {
1509 .get_param = adreno_get_param,
1510 .hw_init = a5xx_hw_init,
1511 .pm_suspend = a5xx_pm_suspend,
1512 .pm_resume = a5xx_pm_resume,
1513 .recover = a5xx_recover,
1514 .submit = a5xx_submit,
1515 .active_ring = a5xx_active_ring,
1516 .irq = a5xx_irq,
1517 .destroy = a5xx_destroy,
1518 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1519 .show = a5xx_show,
1520 #endif
1521 #if defined(CONFIG_DEBUG_FS)
1522 .debugfs_init = a5xx_debugfs_init,
1523 #endif
1524 .gpu_busy = a5xx_gpu_busy,
1525 .gpu_state_get = a5xx_gpu_state_get,
1526 .gpu_state_put = a5xx_gpu_state_put,
1527 .create_address_space = adreno_iommu_create_address_space,
1528 .get_rptr = a5xx_get_rptr,
1529 },
1530 .get_timestamp = a5xx_get_timestamp,
1531 };
1532
check_speed_bin(struct device * dev)1533 static void check_speed_bin(struct device *dev)
1534 {
1535 struct nvmem_cell *cell;
1536 u32 val;
1537
1538 /*
1539 * If the OPP table specifies a opp-supported-hw property then we have
1540 * to set something with dev_pm_opp_set_supported_hw() or the table
1541 * doesn't get populated so pick an arbitrary value that should
1542 * ensure the default frequencies are selected but not conflict with any
1543 * actual bins
1544 */
1545 val = 0x80;
1546
1547 cell = nvmem_cell_get(dev, "speed_bin");
1548
1549 if (!IS_ERR(cell)) {
1550 void *buf = nvmem_cell_read(cell, NULL);
1551
1552 if (!IS_ERR(buf)) {
1553 u8 bin = *((u8 *) buf);
1554
1555 val = (1 << bin);
1556 kfree(buf);
1557 }
1558
1559 nvmem_cell_put(cell);
1560 }
1561
1562 dev_pm_opp_set_supported_hw(dev, &val, 1);
1563 }
1564
a5xx_gpu_init(struct drm_device * dev)1565 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1566 {
1567 struct msm_drm_private *priv = dev->dev_private;
1568 struct platform_device *pdev = priv->gpu_pdev;
1569 struct a5xx_gpu *a5xx_gpu = NULL;
1570 struct adreno_gpu *adreno_gpu;
1571 struct msm_gpu *gpu;
1572 unsigned int nr_rings;
1573 int ret;
1574
1575 if (!pdev) {
1576 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1577 return ERR_PTR(-ENXIO);
1578 }
1579
1580 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1581 if (!a5xx_gpu)
1582 return ERR_PTR(-ENOMEM);
1583
1584 adreno_gpu = &a5xx_gpu->base;
1585 gpu = &adreno_gpu->base;
1586
1587 adreno_gpu->registers = a5xx_registers;
1588
1589 a5xx_gpu->lm_leakage = 0x4E001A;
1590
1591 check_speed_bin(&pdev->dev);
1592
1593 nr_rings = 4;
1594
1595 if (adreno_is_a510(adreno_gpu))
1596 nr_rings = 1;
1597
1598 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, nr_rings);
1599 if (ret) {
1600 a5xx_destroy(&(a5xx_gpu->base.base));
1601 return ERR_PTR(ret);
1602 }
1603
1604 if (gpu->aspace)
1605 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1606
1607 /* Set up the preemption specific bits and pieces for each ringbuffer */
1608 a5xx_preempt_init(gpu);
1609
1610 return gpu;
1611 }
1612