• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4 
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15 
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18 
19 #define GPU_PAS_ID 13
20 
a5xx_flush(struct msm_gpu * gpu,struct msm_ringbuffer * ring,bool sync)21 void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
22 		bool sync)
23 {
24 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
25 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
26 	uint32_t wptr;
27 	unsigned long flags;
28 
29 	/*
30 	 * Most flush operations need to issue a WHERE_AM_I opcode to sync up
31 	 * the rptr shadow
32 	 */
33 	if (a5xx_gpu->has_whereami && sync) {
34 		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
35 		OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
36 		OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
37 	}
38 
39 	spin_lock_irqsave(&ring->preempt_lock, flags);
40 
41 	/* Copy the shadow to the actual register */
42 	ring->cur = ring->next;
43 
44 	/* Make sure to wrap wptr if we need to */
45 	wptr = get_wptr(ring);
46 
47 	spin_unlock_irqrestore(&ring->preempt_lock, flags);
48 
49 	/* Make sure everything is posted before making a decision */
50 	mb();
51 
52 	/* Update HW if this is the current ring and we are not in preempt */
53 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
54 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
55 }
56 
a5xx_submit_in_rb(struct msm_gpu * gpu,struct msm_gem_submit * submit)57 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
58 {
59 	struct msm_drm_private *priv = gpu->dev->dev_private;
60 	struct msm_ringbuffer *ring = submit->ring;
61 	struct msm_gem_object *obj;
62 	uint32_t *ptr, dwords;
63 	unsigned int i;
64 
65 	for (i = 0; i < submit->nr_cmds; i++) {
66 		switch (submit->cmd[i].type) {
67 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
68 			break;
69 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
70 			if (priv->lastctx == submit->queue->ctx)
71 				break;
72 			fallthrough;
73 		case MSM_SUBMIT_CMD_BUF:
74 			/* copy commands into RB: */
75 			obj = submit->bos[submit->cmd[i].idx].obj;
76 			dwords = submit->cmd[i].size;
77 
78 			ptr = msm_gem_get_vaddr(&obj->base);
79 
80 			/* _get_vaddr() shouldn't fail at this point,
81 			 * since we've already mapped it once in
82 			 * submit_reloc()
83 			 */
84 			if (WARN_ON(IS_ERR_OR_NULL(ptr)))
85 				return;
86 
87 			for (i = 0; i < dwords; i++) {
88 				/* normally the OUT_PKTn() would wait
89 				 * for space for the packet.  But since
90 				 * we just OUT_RING() the whole thing,
91 				 * need to call adreno_wait_ring()
92 				 * ourself:
93 				 */
94 				adreno_wait_ring(ring, 1);
95 				OUT_RING(ring, ptr[i]);
96 			}
97 
98 			msm_gem_put_vaddr(&obj->base);
99 
100 			break;
101 		}
102 	}
103 
104 	a5xx_flush(gpu, ring, true);
105 	a5xx_preempt_trigger(gpu);
106 
107 	/* we might not necessarily have a cmd from userspace to
108 	 * trigger an event to know that submit has completed, so
109 	 * do this manually:
110 	 */
111 	a5xx_idle(gpu, ring);
112 	ring->memptrs->fence = submit->seqno;
113 	msm_gpu_retire(gpu);
114 }
115 
a5xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)116 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
117 {
118 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
119 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
120 	struct msm_drm_private *priv = gpu->dev->dev_private;
121 	struct msm_ringbuffer *ring = submit->ring;
122 	unsigned int i, ibs = 0;
123 
124 	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
125 		priv->lastctx = NULL;
126 		a5xx_submit_in_rb(gpu, submit);
127 		return;
128 	}
129 
130 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
131 	OUT_RING(ring, 0x02);
132 
133 	/* Turn off protected mode to write to special registers */
134 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135 	OUT_RING(ring, 0);
136 
137 	/* Set the save preemption record for the ring/command */
138 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
139 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
140 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
141 
142 	/* Turn back on protected mode */
143 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
144 	OUT_RING(ring, 1);
145 
146 	/* Enable local preemption for finegrain preemption */
147 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
148 	OUT_RING(ring, 0x1);
149 
150 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
151 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
152 	OUT_RING(ring, 0x02);
153 
154 	/* Submit the commands */
155 	for (i = 0; i < submit->nr_cmds; i++) {
156 		switch (submit->cmd[i].type) {
157 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
158 			break;
159 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
160 			if (priv->lastctx == submit->queue->ctx)
161 				break;
162 			fallthrough;
163 		case MSM_SUBMIT_CMD_BUF:
164 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
165 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
166 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
167 			OUT_RING(ring, submit->cmd[i].size);
168 			ibs++;
169 			break;
170 		}
171 	}
172 
173 	/*
174 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
175 	 * are done rendering - otherwise a lucky preemption would start
176 	 * replaying from the last checkpoint
177 	 */
178 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
179 	OUT_RING(ring, 0);
180 	OUT_RING(ring, 0);
181 	OUT_RING(ring, 0);
182 	OUT_RING(ring, 0);
183 	OUT_RING(ring, 0);
184 
185 	/* Turn off IB level preemptions */
186 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
187 	OUT_RING(ring, 0x01);
188 
189 	/* Write the fence to the scratch register */
190 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
191 	OUT_RING(ring, submit->seqno);
192 
193 	/*
194 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
195 	 * timestamp is written to the memory and then triggers the interrupt
196 	 */
197 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
198 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
199 		CP_EVENT_WRITE_0_IRQ);
200 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
201 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
202 	OUT_RING(ring, submit->seqno);
203 
204 	/* Yield the floor on command completion */
205 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
206 	/*
207 	 * If dword[2:1] are non zero, they specify an address for the CP to
208 	 * write the value of dword[3] to on preemption complete. Write 0 to
209 	 * skip the write
210 	 */
211 	OUT_RING(ring, 0x00);
212 	OUT_RING(ring, 0x00);
213 	/* Data value - not used if the address above is 0 */
214 	OUT_RING(ring, 0x01);
215 	/* Set bit 0 to trigger an interrupt on preempt complete */
216 	OUT_RING(ring, 0x01);
217 
218 	/* A WHERE_AM_I packet is not needed after a YIELD */
219 	a5xx_flush(gpu, ring, false);
220 
221 	/* Check to see if we need to start preemption */
222 	a5xx_preempt_trigger(gpu);
223 }
224 
225 static const struct {
226 	u32 offset;
227 	u32 value;
228 } a5xx_hwcg[] = {
229 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
230 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
231 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
232 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
233 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
234 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
235 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
236 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
237 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
238 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
239 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
240 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
241 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
242 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
243 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
244 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
245 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
246 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
247 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
248 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
249 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
250 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
251 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
252 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
253 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
254 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
255 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
256 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
257 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
258 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
259 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
260 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
261 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
262 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
263 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
264 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
265 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
266 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
267 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
268 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
269 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
270 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
271 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
272 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
273 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
274 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
275 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
276 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
277 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
278 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
279 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
280 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
281 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
282 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
283 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
284 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
285 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
286 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
287 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
288 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
289 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
290 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
291 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
292 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
293 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
294 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
295 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
296 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
297 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
298 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
299 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
300 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
301 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
302 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
303 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
304 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
305 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
306 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
307 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
308 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
309 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
310 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
311 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
312 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
313 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
314 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
315 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
316 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
317 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
318 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
319 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
320 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
321 };
322 
a5xx_set_hwcg(struct msm_gpu * gpu,bool state)323 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
324 {
325 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
326 	unsigned int i;
327 
328 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
329 		gpu_write(gpu, a5xx_hwcg[i].offset,
330 			state ? a5xx_hwcg[i].value : 0);
331 
332 	if (adreno_is_a540(adreno_gpu)) {
333 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
334 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
335 	}
336 
337 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
338 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
339 }
340 
a5xx_me_init(struct msm_gpu * gpu)341 static int a5xx_me_init(struct msm_gpu *gpu)
342 {
343 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
344 	struct msm_ringbuffer *ring = gpu->rb[0];
345 
346 	OUT_PKT7(ring, CP_ME_INIT, 8);
347 
348 	OUT_RING(ring, 0x0000002F);
349 
350 	/* Enable multiple hardware contexts */
351 	OUT_RING(ring, 0x00000003);
352 
353 	/* Enable error detection */
354 	OUT_RING(ring, 0x20000000);
355 
356 	/* Don't enable header dump */
357 	OUT_RING(ring, 0x00000000);
358 	OUT_RING(ring, 0x00000000);
359 
360 	/* Specify workarounds for various microcode issues */
361 	if (adreno_is_a530(adreno_gpu)) {
362 		/* Workaround for token end syncs
363 		 * Force a WFI after every direct-render 3D mode draw and every
364 		 * 2D mode 3 draw
365 		 */
366 		OUT_RING(ring, 0x0000000B);
367 	} else if (adreno_is_a510(adreno_gpu)) {
368 		/* Workaround for token and syncs */
369 		OUT_RING(ring, 0x00000001);
370 	} else {
371 		/* No workarounds enabled */
372 		OUT_RING(ring, 0x00000000);
373 	}
374 
375 	OUT_RING(ring, 0x00000000);
376 	OUT_RING(ring, 0x00000000);
377 
378 	a5xx_flush(gpu, ring, true);
379 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
380 }
381 
a5xx_preempt_start(struct msm_gpu * gpu)382 static int a5xx_preempt_start(struct msm_gpu *gpu)
383 {
384 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
385 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
386 	struct msm_ringbuffer *ring = gpu->rb[0];
387 
388 	if (gpu->nr_rings == 1)
389 		return 0;
390 
391 	/* Turn off protected mode to write to special registers */
392 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
393 	OUT_RING(ring, 0);
394 
395 	/* Set the save preemption record for the ring/command */
396 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
397 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
398 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
399 
400 	/* Turn back on protected mode */
401 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
402 	OUT_RING(ring, 1);
403 
404 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
405 	OUT_RING(ring, 0x00);
406 
407 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
408 	OUT_RING(ring, 0x01);
409 
410 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
411 	OUT_RING(ring, 0x01);
412 
413 	/* Yield the floor on command completion */
414 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
415 	OUT_RING(ring, 0x00);
416 	OUT_RING(ring, 0x00);
417 	OUT_RING(ring, 0x01);
418 	OUT_RING(ring, 0x01);
419 
420 	/* The WHERE_AMI_I packet is not needed after a YIELD is issued */
421 	a5xx_flush(gpu, ring, false);
422 
423 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
424 }
425 
a5xx_ucode_check_version(struct a5xx_gpu * a5xx_gpu,struct drm_gem_object * obj)426 static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
427 		struct drm_gem_object *obj)
428 {
429 	u32 *buf = msm_gem_get_vaddr_active(obj);
430 
431 	if (IS_ERR(buf))
432 		return;
433 
434 	/*
435 	 * If the lowest nibble is 0xa that is an indication that this microcode
436 	 * has been patched. The actual version is in dword [3] but we only care
437 	 * about the patchlevel which is the lowest nibble of dword [3]
438 	 */
439 	if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
440 		a5xx_gpu->has_whereami = true;
441 
442 	msm_gem_put_vaddr(obj);
443 }
444 
a5xx_ucode_init(struct msm_gpu * gpu)445 static int a5xx_ucode_init(struct msm_gpu *gpu)
446 {
447 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
448 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
449 	int ret;
450 
451 	if (!a5xx_gpu->pm4_bo) {
452 		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
453 			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
454 
455 
456 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
457 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
458 			a5xx_gpu->pm4_bo = NULL;
459 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
460 				ret);
461 			return ret;
462 		}
463 
464 		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
465 	}
466 
467 	if (!a5xx_gpu->pfp_bo) {
468 		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
469 			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
470 
471 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
472 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
473 			a5xx_gpu->pfp_bo = NULL;
474 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
475 				ret);
476 			return ret;
477 		}
478 
479 		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
480 		a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
481 	}
482 
483 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
484 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
485 
486 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
487 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
488 
489 	return 0;
490 }
491 
492 #define SCM_GPU_ZAP_SHADER_RESUME 0
493 
a5xx_zap_shader_resume(struct msm_gpu * gpu)494 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
495 {
496 	int ret;
497 
498 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
499 	if (ret)
500 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
501 			gpu->name, ret);
502 
503 	return ret;
504 }
505 
a5xx_zap_shader_init(struct msm_gpu * gpu)506 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
507 {
508 	static bool loaded;
509 	int ret;
510 
511 	/*
512 	 * If the zap shader is already loaded into memory we just need to kick
513 	 * the remote processor to reinitialize it
514 	 */
515 	if (loaded)
516 		return a5xx_zap_shader_resume(gpu);
517 
518 	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
519 
520 	loaded = !ret;
521 	return ret;
522 }
523 
524 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
525 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
526 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
527 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
528 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
529 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
530 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
531 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
532 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
533 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
534 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
535 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
536 
a5xx_hw_init(struct msm_gpu * gpu)537 static int a5xx_hw_init(struct msm_gpu *gpu)
538 {
539 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
540 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
541 	int ret;
542 
543 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
544 
545 	if (adreno_is_a540(adreno_gpu))
546 		gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
547 
548 	/* Make all blocks contribute to the GPU BUSY perf counter */
549 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
550 
551 	/* Enable RBBM error reporting bits */
552 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
553 
554 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
555 		/*
556 		 * Mask out the activity signals from RB1-3 to avoid false
557 		 * positives
558 		 */
559 
560 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
561 			0xF0000000);
562 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
563 			0xFFFFFFFF);
564 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
565 			0xFFFFFFFF);
566 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
567 			0xFFFFFFFF);
568 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
569 			0xFFFFFFFF);
570 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
571 			0xFFFFFFFF);
572 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
573 			0xFFFFFFFF);
574 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
575 			0xFFFFFFFF);
576 	}
577 
578 	/* Enable fault detection */
579 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
580 		(1 << 30) | 0xFFFF);
581 
582 	/* Turn on performance counters */
583 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
584 
585 	/* Select CP0 to always count cycles */
586 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
587 
588 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
589 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
590 
591 	/* Increase VFD cache access so LRZ and other data gets evicted less */
592 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
593 
594 	/* Disable L2 bypass in the UCHE */
595 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
596 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
597 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
598 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
599 
600 	/* Set the GMEM VA range (0 to gpu->gmem) */
601 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
602 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
603 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
604 		0x00100000 + adreno_gpu->gmem - 1);
605 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
606 
607 	if (adreno_is_a510(adreno_gpu)) {
608 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
609 		gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
610 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
611 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
612 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
613 			  (0x200 << 11 | 0x200 << 22));
614 	} else {
615 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
616 		if (adreno_is_a530(adreno_gpu))
617 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
618 		if (adreno_is_a540(adreno_gpu))
619 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
620 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
621 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
622 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
623 			  (0x400 << 11 | 0x300 << 22));
624 	}
625 
626 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
627 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
628 
629 	/* Enable USE_RETENTION_FLOPS */
630 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
631 
632 	/* Enable ME/PFP split notification */
633 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
634 
635 	/*
636 	 *  In A5x, CCU can send context_done event of a particular context to
637 	 *  UCHE which ultimately reaches CP even when there is valid
638 	 *  transaction of that context inside CCU. This can let CP to program
639 	 *  config registers, which will make the "valid transaction" inside
640 	 *  CCU to be interpreted differently. This can cause gpu fault. This
641 	 *  bug is fixed in latest A510 revision. To enable this bug fix -
642 	 *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
643 	 *  (disable). For older A510 version this bit is unused.
644 	 */
645 	if (adreno_is_a510(adreno_gpu))
646 		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
647 
648 	/* Enable HWCG */
649 	a5xx_set_hwcg(gpu, true);
650 
651 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
652 
653 	/* Set the highest bank bit */
654 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
655 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
656 	if (adreno_is_a540(adreno_gpu))
657 		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
658 
659 	/* Protect registers from the CP */
660 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
661 
662 	/* RBBM */
663 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
664 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
665 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
666 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
667 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
668 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
669 
670 	/* Content protect */
671 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
672 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
673 			16));
674 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
675 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
676 
677 	/* CP */
678 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
679 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
680 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
681 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
682 
683 	/* RB */
684 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
685 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
686 
687 	/* VPC */
688 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
689 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
690 
691 	/* UCHE */
692 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
693 
694 	if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
695 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
696 			ADRENO_PROTECT_RW(0x10000, 0x8000));
697 
698 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
699 	/*
700 	 * Disable the trusted memory range - we don't actually supported secure
701 	 * memory rendering at this point in time and we don't want to block off
702 	 * part of the virtual memory space.
703 	 */
704 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
705 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
706 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
707 
708 	/* Put the GPU into 64 bit by default */
709 	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
710 	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
711 	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
712 	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
713 	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
714 	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
715 	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
716 	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
717 	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
718 	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
719 	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
720 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
721 
722 	/*
723 	 * VPC corner case with local memory load kill leads to corrupt
724 	 * internal state. Normal Disable does not work for all a5x chips.
725 	 * So do the following setting to disable it.
726 	 */
727 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
728 		gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
729 		gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
730 	}
731 
732 	ret = adreno_hw_init(gpu);
733 	if (ret)
734 		return ret;
735 
736 	if (!adreno_is_a510(adreno_gpu))
737 		a5xx_gpmu_ucode_init(gpu);
738 
739 	ret = a5xx_ucode_init(gpu);
740 	if (ret)
741 		return ret;
742 
743 	/* Set the ringbuffer address */
744 	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
745 		gpu->rb[0]->iova);
746 
747 	/*
748 	 * If the microcode supports the WHERE_AM_I opcode then we can use that
749 	 * in lieu of the RPTR shadow and enable preemption. Otherwise, we
750 	 * can't safely use the RPTR shadow or preemption. In either case, the
751 	 * RPTR shadow should be disabled in hardware.
752 	 */
753 	gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
754 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
755 
756 	/* Create a privileged buffer for the RPTR shadow */
757 	if (a5xx_gpu->has_whereami) {
758 		if (!a5xx_gpu->shadow_bo) {
759 			a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
760 				sizeof(u32) * gpu->nr_rings,
761 				MSM_BO_UNCACHED | MSM_BO_MAP_PRIV,
762 				gpu->aspace, &a5xx_gpu->shadow_bo,
763 				&a5xx_gpu->shadow_iova);
764 
765 			if (IS_ERR(a5xx_gpu->shadow))
766 				return PTR_ERR(a5xx_gpu->shadow);
767 		}
768 
769 		gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
770 			REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
771 	} else if (gpu->nr_rings > 1) {
772 		/* Disable preemption if WHERE_AM_I isn't available */
773 		a5xx_preempt_fini(gpu);
774 		gpu->nr_rings = 1;
775 	}
776 
777 	a5xx_preempt_hw_init(gpu);
778 
779 	/* Disable the interrupts through the initial bringup stage */
780 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
781 
782 	/* Clear ME_HALT to start the micro engine */
783 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
784 	ret = a5xx_me_init(gpu);
785 	if (ret)
786 		return ret;
787 
788 	ret = a5xx_power_init(gpu);
789 	if (ret)
790 		return ret;
791 
792 	/*
793 	 * Send a pipeline event stat to get misbehaving counters to start
794 	 * ticking correctly
795 	 */
796 	if (adreno_is_a530(adreno_gpu)) {
797 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
798 		OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
799 
800 		a5xx_flush(gpu, gpu->rb[0], true);
801 		if (!a5xx_idle(gpu, gpu->rb[0]))
802 			return -EINVAL;
803 	}
804 
805 	/*
806 	 * If the chip that we are using does support loading one, then
807 	 * try to load a zap shader into the secure world. If successful
808 	 * we can use the CP to switch out of secure mode. If not then we
809 	 * have no resource but to try to switch ourselves out manually. If we
810 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
811 	 * be blocked and a permissions violation will soon follow.
812 	 */
813 	ret = a5xx_zap_shader_init(gpu);
814 	if (!ret) {
815 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
816 		OUT_RING(gpu->rb[0], 0x00000000);
817 
818 		a5xx_flush(gpu, gpu->rb[0], true);
819 		if (!a5xx_idle(gpu, gpu->rb[0]))
820 			return -EINVAL;
821 	} else if (ret == -ENODEV) {
822 		/*
823 		 * This device does not use zap shader (but print a warning
824 		 * just in case someone got their dt wrong.. hopefully they
825 		 * have a debug UART to realize the error of their ways...
826 		 * if you mess this up you are about to crash horribly)
827 		 */
828 		dev_warn_once(gpu->dev->dev,
829 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
830 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
831 	} else {
832 		return ret;
833 	}
834 
835 	/* Last step - yield the ringbuffer */
836 	a5xx_preempt_start(gpu);
837 
838 	return 0;
839 }
840 
a5xx_recover(struct msm_gpu * gpu)841 static void a5xx_recover(struct msm_gpu *gpu)
842 {
843 	int i;
844 
845 	adreno_dump_info(gpu);
846 
847 	for (i = 0; i < 8; i++) {
848 		printk("CP_SCRATCH_REG%d: %u\n", i,
849 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
850 	}
851 
852 	if (hang_debug)
853 		a5xx_dump(gpu);
854 
855 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
856 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
857 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
858 	adreno_recover(gpu);
859 }
860 
a5xx_destroy(struct msm_gpu * gpu)861 static void a5xx_destroy(struct msm_gpu *gpu)
862 {
863 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
864 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
865 
866 	DBG("%s", gpu->name);
867 
868 	a5xx_preempt_fini(gpu);
869 
870 	if (a5xx_gpu->pm4_bo) {
871 		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
872 		drm_gem_object_put(a5xx_gpu->pm4_bo);
873 	}
874 
875 	if (a5xx_gpu->pfp_bo) {
876 		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
877 		drm_gem_object_put(a5xx_gpu->pfp_bo);
878 	}
879 
880 	if (a5xx_gpu->gpmu_bo) {
881 		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
882 		drm_gem_object_put(a5xx_gpu->gpmu_bo);
883 	}
884 
885 	if (a5xx_gpu->shadow_bo) {
886 		msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
887 		drm_gem_object_put(a5xx_gpu->shadow_bo);
888 	}
889 
890 	adreno_gpu_cleanup(adreno_gpu);
891 	kfree(a5xx_gpu);
892 }
893 
_a5xx_check_idle(struct msm_gpu * gpu)894 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
895 {
896 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
897 		return false;
898 
899 	/*
900 	 * Nearly every abnormality ends up pausing the GPU and triggering a
901 	 * fault so we can safely just watch for this one interrupt to fire
902 	 */
903 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
904 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
905 }
906 
a5xx_idle(struct msm_gpu * gpu,struct msm_ringbuffer * ring)907 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
908 {
909 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
910 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
911 
912 	if (ring != a5xx_gpu->cur_ring) {
913 		WARN(1, "Tried to idle a non-current ringbuffer\n");
914 		return false;
915 	}
916 
917 	/* wait for CP to drain ringbuffer: */
918 	if (!adreno_idle(gpu, ring))
919 		return false;
920 
921 	if (spin_until(_a5xx_check_idle(gpu))) {
922 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
923 			gpu->name, __builtin_return_address(0),
924 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
925 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
926 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
927 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
928 		return false;
929 	}
930 
931 	return true;
932 }
933 
a5xx_fault_handler(void * arg,unsigned long iova,int flags)934 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
935 {
936 	struct msm_gpu *gpu = arg;
937 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
938 			iova, flags,
939 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
940 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
941 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
942 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
943 
944 	return -EFAULT;
945 }
946 
a5xx_cp_err_irq(struct msm_gpu * gpu)947 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
948 {
949 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
950 
951 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
952 		u32 val;
953 
954 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
955 
956 		/*
957 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
958 		 * read it twice
959 		 */
960 
961 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
962 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
963 
964 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
965 			val);
966 	}
967 
968 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
969 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
970 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
971 
972 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
973 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
974 
975 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
976 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
977 
978 		dev_err_ratelimited(gpu->dev->dev,
979 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
980 			val & (1 << 24) ? "WRITE" : "READ",
981 			(val & 0xFFFFF) >> 2, val);
982 	}
983 
984 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
985 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
986 		const char *access[16] = { "reserved", "reserved",
987 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
988 			"", "", "me read", "me write", "", "", "crashdump read",
989 			"crashdump write" };
990 
991 		dev_err_ratelimited(gpu->dev->dev,
992 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
993 			status & 0xFFFFF, access[(status >> 24) & 0xF],
994 			(status & (1 << 31)), status);
995 	}
996 }
997 
a5xx_rbbm_err_irq(struct msm_gpu * gpu,u32 status)998 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
999 {
1000 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1001 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1002 
1003 		dev_err_ratelimited(gpu->dev->dev,
1004 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
1005 			val & (1 << 28) ? "WRITE" : "READ",
1006 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
1007 			(val >> 24) & 0xF);
1008 
1009 		/* Clear the error */
1010 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
1011 
1012 		/* Clear the interrupt */
1013 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1014 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1015 	}
1016 
1017 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1018 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1019 
1020 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1021 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1022 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1023 
1024 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1025 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1026 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1027 
1028 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1029 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1030 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1031 
1032 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1033 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1034 
1035 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1036 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1037 }
1038 
a5xx_uche_err_irq(struct msm_gpu * gpu)1039 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1040 {
1041 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1042 
1043 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1044 
1045 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1046 		addr);
1047 }
1048 
a5xx_gpmu_err_irq(struct msm_gpu * gpu)1049 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1050 {
1051 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1052 }
1053 
a5xx_fault_detect_irq(struct msm_gpu * gpu)1054 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1055 {
1056 	struct drm_device *dev = gpu->dev;
1057 	struct msm_drm_private *priv = dev->dev_private;
1058 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1059 
1060 	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1061 		ring ? ring->id : -1, ring ? ring->seqno : 0,
1062 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1063 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1064 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1065 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1066 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1067 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1068 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1069 
1070 	/* Turn off the hangcheck timer to keep it from bothering us */
1071 	del_timer(&gpu->hangcheck_timer);
1072 
1073 	queue_work(priv->wq, &gpu->recover_work);
1074 }
1075 
1076 #define RBBM_ERROR_MASK \
1077 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1078 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1079 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1080 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1081 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1082 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1083 
a5xx_irq(struct msm_gpu * gpu)1084 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1085 {
1086 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1087 
1088 	/*
1089 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1090 	 * before the source is cleared the interrupt will storm.
1091 	 */
1092 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1093 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1094 
1095 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1096 	if (status & RBBM_ERROR_MASK)
1097 		a5xx_rbbm_err_irq(gpu, status);
1098 
1099 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1100 		a5xx_cp_err_irq(gpu);
1101 
1102 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1103 		a5xx_fault_detect_irq(gpu);
1104 
1105 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1106 		a5xx_uche_err_irq(gpu);
1107 
1108 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1109 		a5xx_gpmu_err_irq(gpu);
1110 
1111 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1112 		a5xx_preempt_trigger(gpu);
1113 		msm_gpu_retire(gpu);
1114 	}
1115 
1116 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1117 		a5xx_preempt_irq(gpu);
1118 
1119 	return IRQ_HANDLED;
1120 }
1121 
1122 static const u32 a5xx_registers[] = {
1123 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1124 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1125 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1126 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1127 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1128 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1129 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1130 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1131 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1132 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1133 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1134 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1135 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1136 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1137 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1138 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1139 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1140 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1141 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1142 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1143 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1144 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1145 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1146 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1147 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1148 	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1149 	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1150 	0xAC60, 0xAC60, ~0,
1151 };
1152 
a5xx_dump(struct msm_gpu * gpu)1153 static void a5xx_dump(struct msm_gpu *gpu)
1154 {
1155 	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1156 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1157 	adreno_dump(gpu);
1158 }
1159 
a5xx_pm_resume(struct msm_gpu * gpu)1160 static int a5xx_pm_resume(struct msm_gpu *gpu)
1161 {
1162 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1163 	int ret;
1164 
1165 	/* Turn on the core power */
1166 	ret = msm_gpu_pm_resume(gpu);
1167 	if (ret)
1168 		return ret;
1169 
1170 	if (adreno_is_a510(adreno_gpu)) {
1171 		/* Halt the sp_input_clk at HM level */
1172 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1173 		a5xx_set_hwcg(gpu, true);
1174 		/* Turn on sp_input_clk at HM level */
1175 		gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1176 		return 0;
1177 	}
1178 
1179 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1180 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1181 
1182 	/* Wait 3 usecs before polling */
1183 	udelay(3);
1184 
1185 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1186 		(1 << 20), (1 << 20));
1187 	if (ret) {
1188 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1189 			gpu->name,
1190 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1191 		return ret;
1192 	}
1193 
1194 	/* Turn on the SP domain */
1195 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1196 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1197 		(1 << 20), (1 << 20));
1198 	if (ret)
1199 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1200 			gpu->name);
1201 
1202 	return ret;
1203 }
1204 
a5xx_pm_suspend(struct msm_gpu * gpu)1205 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1206 {
1207 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1208 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1209 	u32 mask = 0xf;
1210 	int i, ret;
1211 
1212 	/* A510 has 3 XIN ports in VBIF */
1213 	if (adreno_is_a510(adreno_gpu))
1214 		mask = 0x7;
1215 
1216 	/* Clear the VBIF pipe before shutting down */
1217 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1218 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1219 				mask) == mask);
1220 
1221 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1222 
1223 	/*
1224 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1225 	 * entries
1226 	 */
1227 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1228 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1229 
1230 	ret = msm_gpu_pm_suspend(gpu);
1231 	if (ret)
1232 		return ret;
1233 
1234 	if (a5xx_gpu->has_whereami)
1235 		for (i = 0; i < gpu->nr_rings; i++)
1236 			a5xx_gpu->shadow[i] = 0;
1237 
1238 	return 0;
1239 }
1240 
a5xx_get_timestamp(struct msm_gpu * gpu,uint64_t * value)1241 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1242 {
1243 	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
1244 		REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
1245 
1246 	return 0;
1247 }
1248 
1249 struct a5xx_crashdumper {
1250 	void *ptr;
1251 	struct drm_gem_object *bo;
1252 	u64 iova;
1253 };
1254 
1255 struct a5xx_gpu_state {
1256 	struct msm_gpu_state base;
1257 	u32 *hlsqregs;
1258 };
1259 
a5xx_crashdumper_init(struct msm_gpu * gpu,struct a5xx_crashdumper * dumper)1260 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1261 		struct a5xx_crashdumper *dumper)
1262 {
1263 	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1264 		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1265 		&dumper->bo, &dumper->iova);
1266 
1267 	if (!IS_ERR(dumper->ptr))
1268 		msm_gem_object_set_name(dumper->bo, "crashdump");
1269 
1270 	return PTR_ERR_OR_ZERO(dumper->ptr);
1271 }
1272 
a5xx_crashdumper_run(struct msm_gpu * gpu,struct a5xx_crashdumper * dumper)1273 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1274 		struct a5xx_crashdumper *dumper)
1275 {
1276 	u32 val;
1277 
1278 	if (IS_ERR_OR_NULL(dumper->ptr))
1279 		return -EINVAL;
1280 
1281 	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1282 		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1283 
1284 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1285 
1286 	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1287 		val & 0x04, 100, 10000);
1288 }
1289 
1290 /*
1291  * These are a list of the registers that need to be read through the HLSQ
1292  * aperture through the crashdumper.  These are not nominally accessible from
1293  * the CPU on a secure platform.
1294  */
1295 static const struct {
1296 	u32 type;
1297 	u32 regoffset;
1298 	u32 count;
1299 } a5xx_hlsq_aperture_regs[] = {
1300 	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1301 	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1302 	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1303 	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1304 	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1305 	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1306 	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1307 	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1308 	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1309 	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1310 	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1311 	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1312 	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1313 	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1314 	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1315 };
1316 
a5xx_gpu_state_get_hlsq_regs(struct msm_gpu * gpu,struct a5xx_gpu_state * a5xx_state)1317 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1318 		struct a5xx_gpu_state *a5xx_state)
1319 {
1320 	struct a5xx_crashdumper dumper = { 0 };
1321 	u32 offset, count = 0;
1322 	u64 *ptr;
1323 	int i;
1324 
1325 	if (a5xx_crashdumper_init(gpu, &dumper))
1326 		return;
1327 
1328 	/* The script will be written at offset 0 */
1329 	ptr = dumper.ptr;
1330 
1331 	/* Start writing the data at offset 256k */
1332 	offset = dumper.iova + (256 * SZ_1K);
1333 
1334 	/* Count how many additional registers to get from the HLSQ aperture */
1335 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1336 		count += a5xx_hlsq_aperture_regs[i].count;
1337 
1338 	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1339 	if (!a5xx_state->hlsqregs)
1340 		return;
1341 
1342 	/* Build the crashdump script */
1343 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1344 		u32 type = a5xx_hlsq_aperture_regs[i].type;
1345 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1346 
1347 		/* Write the register to select the desired bank */
1348 		*ptr++ = ((u64) type << 8);
1349 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1350 			(1 << 21) | 1;
1351 
1352 		*ptr++ = offset;
1353 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1354 			| c;
1355 
1356 		offset += c * sizeof(u32);
1357 	}
1358 
1359 	/* Write two zeros to close off the script */
1360 	*ptr++ = 0;
1361 	*ptr++ = 0;
1362 
1363 	if (a5xx_crashdumper_run(gpu, &dumper)) {
1364 		kfree(a5xx_state->hlsqregs);
1365 		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1366 		return;
1367 	}
1368 
1369 	/* Copy the data from the crashdumper to the state */
1370 	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1371 		count * sizeof(u32));
1372 
1373 	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1374 }
1375 
a5xx_gpu_state_get(struct msm_gpu * gpu)1376 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1377 {
1378 	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1379 			GFP_KERNEL);
1380 
1381 	if (!a5xx_state)
1382 		return ERR_PTR(-ENOMEM);
1383 
1384 	/* Temporarily disable hardware clock gating before reading the hw */
1385 	a5xx_set_hwcg(gpu, false);
1386 
1387 	/* First get the generic state from the adreno core */
1388 	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1389 
1390 	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1391 
1392 	/* Get the HLSQ regs with the help of the crashdumper */
1393 	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1394 
1395 	a5xx_set_hwcg(gpu, true);
1396 
1397 	return &a5xx_state->base;
1398 }
1399 
a5xx_gpu_state_destroy(struct kref * kref)1400 static void a5xx_gpu_state_destroy(struct kref *kref)
1401 {
1402 	struct msm_gpu_state *state = container_of(kref,
1403 		struct msm_gpu_state, ref);
1404 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1405 		struct a5xx_gpu_state, base);
1406 
1407 	kfree(a5xx_state->hlsqregs);
1408 
1409 	adreno_gpu_state_destroy(state);
1410 	kfree(a5xx_state);
1411 }
1412 
a5xx_gpu_state_put(struct msm_gpu_state * state)1413 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1414 {
1415 	if (IS_ERR_OR_NULL(state))
1416 		return 1;
1417 
1418 	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1419 }
1420 
1421 
1422 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
a5xx_show(struct msm_gpu * gpu,struct msm_gpu_state * state,struct drm_printer * p)1423 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1424 		      struct drm_printer *p)
1425 {
1426 	int i, j;
1427 	u32 pos = 0;
1428 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1429 		struct a5xx_gpu_state, base);
1430 
1431 	if (IS_ERR_OR_NULL(state))
1432 		return;
1433 
1434 	adreno_show(gpu, state, p);
1435 
1436 	/* Dump the additional a5xx HLSQ registers */
1437 	if (!a5xx_state->hlsqregs)
1438 		return;
1439 
1440 	drm_printf(p, "registers-hlsq:\n");
1441 
1442 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1443 		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1444 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1445 
1446 		for (j = 0; j < c; j++, pos++, o++) {
1447 			/*
1448 			 * To keep the crashdump simple we pull the entire range
1449 			 * for each register type but not all of the registers
1450 			 * in the range are valid. Fortunately invalid registers
1451 			 * stick out like a sore thumb with a value of
1452 			 * 0xdeadbeef
1453 			 */
1454 			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1455 				continue;
1456 
1457 			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1458 				o << 2, a5xx_state->hlsqregs[pos]);
1459 		}
1460 	}
1461 }
1462 #endif
1463 
a5xx_active_ring(struct msm_gpu * gpu)1464 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1465 {
1466 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1467 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1468 
1469 	return a5xx_gpu->cur_ring;
1470 }
1471 
a5xx_gpu_busy(struct msm_gpu * gpu)1472 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1473 {
1474 	u64 busy_cycles, busy_time;
1475 
1476 	/* Only read the gpu busy if the hardware is already active */
1477 	if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1478 		return 0;
1479 
1480 	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1481 			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1482 
1483 	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1484 	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1485 
1486 	gpu->devfreq.busy_cycles = busy_cycles;
1487 
1488 	pm_runtime_put(&gpu->pdev->dev);
1489 
1490 	if (WARN_ON(busy_time > ~0LU))
1491 		return ~0LU;
1492 
1493 	return (unsigned long)busy_time;
1494 }
1495 
a5xx_get_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)1496 static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1497 {
1498 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1499 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1500 
1501 	if (a5xx_gpu->has_whereami)
1502 		return a5xx_gpu->shadow[ring->id];
1503 
1504 	return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1505 }
1506 
1507 static const struct adreno_gpu_funcs funcs = {
1508 	.base = {
1509 		.get_param = adreno_get_param,
1510 		.hw_init = a5xx_hw_init,
1511 		.pm_suspend = a5xx_pm_suspend,
1512 		.pm_resume = a5xx_pm_resume,
1513 		.recover = a5xx_recover,
1514 		.submit = a5xx_submit,
1515 		.active_ring = a5xx_active_ring,
1516 		.irq = a5xx_irq,
1517 		.destroy = a5xx_destroy,
1518 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1519 		.show = a5xx_show,
1520 #endif
1521 #if defined(CONFIG_DEBUG_FS)
1522 		.debugfs_init = a5xx_debugfs_init,
1523 #endif
1524 		.gpu_busy = a5xx_gpu_busy,
1525 		.gpu_state_get = a5xx_gpu_state_get,
1526 		.gpu_state_put = a5xx_gpu_state_put,
1527 		.create_address_space = adreno_iommu_create_address_space,
1528 		.get_rptr = a5xx_get_rptr,
1529 	},
1530 	.get_timestamp = a5xx_get_timestamp,
1531 };
1532 
check_speed_bin(struct device * dev)1533 static void check_speed_bin(struct device *dev)
1534 {
1535 	struct nvmem_cell *cell;
1536 	u32 val;
1537 
1538 	/*
1539 	 * If the OPP table specifies a opp-supported-hw property then we have
1540 	 * to set something with dev_pm_opp_set_supported_hw() or the table
1541 	 * doesn't get populated so pick an arbitrary value that should
1542 	 * ensure the default frequencies are selected but not conflict with any
1543 	 * actual bins
1544 	 */
1545 	val = 0x80;
1546 
1547 	cell = nvmem_cell_get(dev, "speed_bin");
1548 
1549 	if (!IS_ERR(cell)) {
1550 		void *buf = nvmem_cell_read(cell, NULL);
1551 
1552 		if (!IS_ERR(buf)) {
1553 			u8 bin = *((u8 *) buf);
1554 
1555 			val = (1 << bin);
1556 			kfree(buf);
1557 		}
1558 
1559 		nvmem_cell_put(cell);
1560 	}
1561 
1562 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1563 }
1564 
a5xx_gpu_init(struct drm_device * dev)1565 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1566 {
1567 	struct msm_drm_private *priv = dev->dev_private;
1568 	struct platform_device *pdev = priv->gpu_pdev;
1569 	struct a5xx_gpu *a5xx_gpu = NULL;
1570 	struct adreno_gpu *adreno_gpu;
1571 	struct msm_gpu *gpu;
1572 	unsigned int nr_rings;
1573 	int ret;
1574 
1575 	if (!pdev) {
1576 		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1577 		return ERR_PTR(-ENXIO);
1578 	}
1579 
1580 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1581 	if (!a5xx_gpu)
1582 		return ERR_PTR(-ENOMEM);
1583 
1584 	adreno_gpu = &a5xx_gpu->base;
1585 	gpu = &adreno_gpu->base;
1586 
1587 	adreno_gpu->registers = a5xx_registers;
1588 
1589 	a5xx_gpu->lm_leakage = 0x4E001A;
1590 
1591 	check_speed_bin(&pdev->dev);
1592 
1593 	nr_rings = 4;
1594 
1595 	if (adreno_is_a510(adreno_gpu))
1596 		nr_rings = 1;
1597 
1598 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, nr_rings);
1599 	if (ret) {
1600 		a5xx_destroy(&(a5xx_gpu->base.base));
1601 		return ERR_PTR(ret);
1602 	}
1603 
1604 	if (gpu->aspace)
1605 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1606 
1607 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1608 	a5xx_preempt_init(gpu);
1609 
1610 	return gpu;
1611 }
1612