1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
3
4
5 #include "msm_gem.h"
6 #include "msm_mmu.h"
7 #include "msm_gpu_trace.h"
8 #include "a6xx_gpu.h"
9 #include "a6xx_gmu.xml.h"
10
11 #include <linux/bitfield.h>
12 #include <linux/devfreq.h>
13 #include <linux/firmware/qcom/qcom_scm.h>
14 #include <linux/pm_domain.h>
15 #include <linux/soc/qcom/llcc-qcom.h>
16
17 #define GPU_PAS_ID 13
18
_a6xx_check_idle(struct msm_gpu * gpu)19 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
20 {
21 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
22 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
23
24 /* Check that the GMU is idle */
25 if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu))
26 return false;
27
28 /* Check tha the CX master is idle */
29 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
30 ~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
31 return false;
32
33 return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
34 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
35 }
36
a6xx_idle(struct msm_gpu * gpu,struct msm_ringbuffer * ring)37 static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
38 {
39 /* wait for CP to drain ringbuffer: */
40 if (!adreno_idle(gpu, ring))
41 return false;
42
43 if (spin_until(_a6xx_check_idle(gpu))) {
44 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
45 gpu->name, __builtin_return_address(0),
46 gpu_read(gpu, REG_A6XX_RBBM_STATUS),
47 gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
48 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
49 gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
50 return false;
51 }
52
53 return true;
54 }
55
update_shadow_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)56 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
57 {
58 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
59 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
60
61 /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
62 if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
63 OUT_PKT7(ring, CP_WHERE_AM_I, 2);
64 OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
65 OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
66 }
67 }
68
a6xx_flush(struct msm_gpu * gpu,struct msm_ringbuffer * ring)69 static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
70 {
71 uint32_t wptr;
72 unsigned long flags;
73
74 update_shadow_rptr(gpu, ring);
75
76 spin_lock_irqsave(&ring->preempt_lock, flags);
77
78 /* Copy the shadow to the actual register */
79 ring->cur = ring->next;
80
81 /* Make sure to wrap wptr if we need to */
82 wptr = get_wptr(ring);
83
84 spin_unlock_irqrestore(&ring->preempt_lock, flags);
85
86 /* Make sure everything is posted before making a decision */
87 mb();
88
89 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
90 }
91
get_stats_counter(struct msm_ringbuffer * ring,u32 counter,u64 iova)92 static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
93 u64 iova)
94 {
95 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
96 OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
97 CP_REG_TO_MEM_0_CNT(2) |
98 CP_REG_TO_MEM_0_64B);
99 OUT_RING(ring, lower_32_bits(iova));
100 OUT_RING(ring, upper_32_bits(iova));
101 }
102
a6xx_set_pagetable(struct a6xx_gpu * a6xx_gpu,struct msm_ringbuffer * ring,struct msm_gem_submit * submit)103 static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
104 struct msm_ringbuffer *ring, struct msm_gem_submit *submit)
105 {
106 bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
107 struct msm_file_private *ctx = submit->queue->ctx;
108 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
109 phys_addr_t ttbr;
110 u32 asid;
111 u64 memptr = rbmemptr(ring, ttbr0);
112
113 if (ctx->seqno == a6xx_gpu->base.base.cur_ctx_seqno)
114 return;
115
116 if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
117 return;
118
119 if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) {
120 /* Wait for previous submit to complete before continuing: */
121 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
122 OUT_RING(ring, 0);
123 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
124 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
125 OUT_RING(ring, submit->seqno - 1);
126
127 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
128 OUT_RING(ring, CP_SET_THREAD_BOTH);
129
130 /* Reset state used to synchronize BR and BV */
131 OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1);
132 OUT_RING(ring,
133 CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS |
134 CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE |
135 CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER |
136 CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS);
137
138 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
139 OUT_RING(ring, CP_SET_THREAD_BR);
140 }
141
142 if (!sysprof) {
143 if (!adreno_is_a7xx(adreno_gpu)) {
144 /* Turn off protected mode to write to special registers */
145 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
146 OUT_RING(ring, 0);
147 }
148
149 OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
150 OUT_RING(ring, 1);
151 }
152
153 /* Execute the table update */
154 OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
155 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
156
157 OUT_RING(ring,
158 CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
159 CP_SMMU_TABLE_UPDATE_1_ASID(asid));
160 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
161 OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
162
163 /*
164 * Write the new TTBR0 to the memstore. This is good for debugging.
165 */
166 OUT_PKT7(ring, CP_MEM_WRITE, 4);
167 OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
168 OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
169 OUT_RING(ring, lower_32_bits(ttbr));
170 OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
171
172 /*
173 * Sync both threads after switching pagetables and enable BR only
174 * to make sure BV doesn't race ahead while BR is still switching
175 * pagetables.
176 */
177 if (adreno_is_a7xx(&a6xx_gpu->base)) {
178 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
179 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
180 }
181
182 /*
183 * And finally, trigger a uche flush to be sure there isn't anything
184 * lingering in that part of the GPU
185 */
186
187 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
188 OUT_RING(ring, CACHE_INVALIDATE);
189
190 if (!sysprof) {
191 /*
192 * Wait for SRAM clear after the pgtable update, so the
193 * two can happen in parallel:
194 */
195 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
196 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
197 OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
198 REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
199 OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
200 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
201 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
202 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
203
204 if (!adreno_is_a7xx(adreno_gpu)) {
205 /* Re-enable protected mode: */
206 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
207 OUT_RING(ring, 1);
208 }
209 }
210 }
211
a6xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)212 static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
213 {
214 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
215 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
216 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
217 struct msm_ringbuffer *ring = submit->ring;
218 unsigned int i, ibs = 0;
219
220 a6xx_set_pagetable(a6xx_gpu, ring, submit);
221
222 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
223 rbmemptr_stats(ring, index, cpcycles_start));
224
225 /*
226 * For PM4 the GMU register offsets are calculated from the base of the
227 * GPU registers so we need to add 0x1a800 to the register value on A630
228 * to get the right value from PM4.
229 */
230 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
231 rbmemptr_stats(ring, index, alwayson_start));
232
233 /* Invalidate CCU depth and color */
234 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
235 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
236
237 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
238 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
239
240 /* Submit the commands */
241 for (i = 0; i < submit->nr_cmds; i++) {
242 switch (submit->cmd[i].type) {
243 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
244 break;
245 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
246 if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
247 break;
248 fallthrough;
249 case MSM_SUBMIT_CMD_BUF:
250 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
251 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
252 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
253 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
254 ibs++;
255 break;
256 }
257
258 /*
259 * Periodically update shadow-wptr if needed, so that we
260 * can see partial progress of submits with large # of
261 * cmds.. otherwise we could needlessly stall waiting for
262 * ringbuffer state, simply due to looking at a shadow
263 * rptr value that has not been updated
264 */
265 if ((ibs % 32) == 0)
266 update_shadow_rptr(gpu, ring);
267 }
268
269 get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
270 rbmemptr_stats(ring, index, cpcycles_end));
271 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
272 rbmemptr_stats(ring, index, alwayson_end));
273
274 /* Write the fence to the scratch register */
275 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
276 OUT_RING(ring, submit->seqno);
277
278 /*
279 * Execute a CACHE_FLUSH_TS event. This will ensure that the
280 * timestamp is written to the memory and then triggers the interrupt
281 */
282 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
283 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
284 CP_EVENT_WRITE_0_IRQ);
285 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
286 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
287 OUT_RING(ring, submit->seqno);
288
289 trace_msm_gpu_submit_flush(submit,
290 gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
291
292 a6xx_flush(gpu, ring);
293 }
294
a7xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)295 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
296 {
297 unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
298 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
299 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
300 struct msm_ringbuffer *ring = submit->ring;
301 unsigned int i, ibs = 0;
302
303 /*
304 * Toggle concurrent binning for pagetable switch and set the thread to
305 * BR since only it can execute the pagetable switch packets.
306 */
307 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
308 OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
309
310 a6xx_set_pagetable(a6xx_gpu, ring, submit);
311
312 get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
313 rbmemptr_stats(ring, index, cpcycles_start));
314 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
315 rbmemptr_stats(ring, index, alwayson_start));
316
317 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
318 OUT_RING(ring, CP_SET_THREAD_BOTH);
319
320 OUT_PKT7(ring, CP_SET_MARKER, 1);
321 OUT_RING(ring, 0x101); /* IFPC disable */
322
323 OUT_PKT7(ring, CP_SET_MARKER, 1);
324 OUT_RING(ring, 0x00d); /* IB1LIST start */
325
326 /* Submit the commands */
327 for (i = 0; i < submit->nr_cmds; i++) {
328 switch (submit->cmd[i].type) {
329 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
330 break;
331 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
332 if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
333 break;
334 fallthrough;
335 case MSM_SUBMIT_CMD_BUF:
336 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
337 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
338 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
339 OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
340 ibs++;
341 break;
342 }
343
344 /*
345 * Periodically update shadow-wptr if needed, so that we
346 * can see partial progress of submits with large # of
347 * cmds.. otherwise we could needlessly stall waiting for
348 * ringbuffer state, simply due to looking at a shadow
349 * rptr value that has not been updated
350 */
351 if ((ibs % 32) == 0)
352 update_shadow_rptr(gpu, ring);
353 }
354
355 OUT_PKT7(ring, CP_SET_MARKER, 1);
356 OUT_RING(ring, 0x00e); /* IB1LIST end */
357
358 get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
359 rbmemptr_stats(ring, index, cpcycles_end));
360 get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
361 rbmemptr_stats(ring, index, alwayson_end));
362
363 /* Write the fence to the scratch register */
364 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
365 OUT_RING(ring, submit->seqno);
366
367 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
368 OUT_RING(ring, CP_SET_THREAD_BR);
369
370 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
371 OUT_RING(ring, CCU_INVALIDATE_DEPTH);
372
373 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
374 OUT_RING(ring, CCU_INVALIDATE_COLOR);
375
376 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
377 OUT_RING(ring, CP_SET_THREAD_BV);
378
379 /*
380 * Make sure the timestamp is committed once BV pipe is
381 * completely done with this submission.
382 */
383 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
384 OUT_RING(ring, CACHE_CLEAN | BIT(27));
385 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
386 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
387 OUT_RING(ring, submit->seqno);
388
389 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
390 OUT_RING(ring, CP_SET_THREAD_BR);
391
392 /*
393 * This makes sure that BR doesn't race ahead and commit
394 * timestamp to memstore while BV is still processing
395 * this submission.
396 */
397 OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
398 OUT_RING(ring, 0);
399 OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
400 OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
401 OUT_RING(ring, submit->seqno);
402
403 /* write the ringbuffer timestamp */
404 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
405 OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27));
406 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
407 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
408 OUT_RING(ring, submit->seqno);
409
410 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
411 OUT_RING(ring, CP_SET_THREAD_BOTH);
412
413 OUT_PKT7(ring, CP_SET_MARKER, 1);
414 OUT_RING(ring, 0x100); /* IFPC enable */
415
416 trace_msm_gpu_submit_flush(submit,
417 gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
418
419 a6xx_flush(gpu, ring);
420 }
421
a6xx_set_hwcg(struct msm_gpu * gpu,bool state)422 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
423 {
424 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
425 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
426 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
427 const struct adreno_reglist *reg;
428 unsigned int i;
429 u32 cgc_delay, cgc_hyst;
430 u32 val, clock_cntl_on;
431
432 if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu)))
433 return;
434
435 if (adreno_is_a630(adreno_gpu))
436 clock_cntl_on = 0x8aa8aa02;
437 else if (adreno_is_a610(adreno_gpu))
438 clock_cntl_on = 0xaaa8aa82;
439 else if (adreno_is_a702(adreno_gpu))
440 clock_cntl_on = 0xaaaaaa82;
441 else
442 clock_cntl_on = 0x8aa8aa82;
443
444 cgc_delay = adreno_is_a615_family(adreno_gpu) ? 0x111 : 0x10111;
445 cgc_hyst = adreno_is_a615_family(adreno_gpu) ? 0x555 : 0x5555;
446
447 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
448 state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
449 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
450 state ? cgc_delay : 0);
451 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
452 state ? cgc_hyst : 0);
453
454 if (!adreno_gpu->info->a6xx->hwcg) {
455 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
456 gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0);
457
458 if (state) {
459 gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1);
460
461 if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val,
462 val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
463 dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
464 return;
465 }
466
467 gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
468 }
469
470 return;
471 }
472
473 val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
474
475 /* Don't re-program the registers if they are already correct */
476 if ((!state && !val) || (state && (val == clock_cntl_on)))
477 return;
478
479 /* Disable SP clock before programming HWCG registers */
480 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
481 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
482
483 for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++)
484 gpu_write(gpu, reg->offset, state ? reg->value : 0);
485
486 /* Enable SP clock */
487 if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
488 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
489
490 gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
491 }
492
a6xx_set_cp_protect(struct msm_gpu * gpu)493 static void a6xx_set_cp_protect(struct msm_gpu *gpu)
494 {
495 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
496 const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
497 unsigned i;
498
499 /*
500 * Enable access protection to privileged registers, fault on an access
501 * protect violation and select the last span to protect from the start
502 * address all the way to the end of the register address space
503 */
504 gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL,
505 A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN |
506 A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN |
507 A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE);
508
509 for (i = 0; i < protect->count - 1; i++) {
510 /* Intentionally skip writing to some registers */
511 if (protect->regs[i])
512 gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]);
513 }
514 /* last CP_PROTECT to have "infinite" length on the last entry */
515 gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]);
516 }
517
a6xx_calc_ubwc_config(struct adreno_gpu * gpu)518 static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
519 {
520 gpu->ubwc_config.rgb565_predicator = 0;
521 gpu->ubwc_config.uavflagprd_inv = 0;
522 gpu->ubwc_config.min_acc_len = 0;
523 gpu->ubwc_config.ubwc_swizzle = 0x6;
524 gpu->ubwc_config.macrotile_mode = 0;
525 gpu->ubwc_config.highest_bank_bit = 15;
526
527 if (adreno_is_a610(gpu)) {
528 gpu->ubwc_config.highest_bank_bit = 13;
529 gpu->ubwc_config.min_acc_len = 1;
530 gpu->ubwc_config.ubwc_swizzle = 0x7;
531 }
532
533 if (adreno_is_a618(gpu))
534 gpu->ubwc_config.highest_bank_bit = 14;
535
536 if (adreno_is_a619(gpu))
537 /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
538 gpu->ubwc_config.highest_bank_bit = 13;
539
540 if (adreno_is_a619_holi(gpu))
541 gpu->ubwc_config.highest_bank_bit = 13;
542
543 if (adreno_is_a621(gpu)) {
544 gpu->ubwc_config.highest_bank_bit = 13;
545 gpu->ubwc_config.amsbc = 1;
546 gpu->ubwc_config.uavflagprd_inv = 2;
547 }
548
549 if (adreno_is_a640_family(gpu))
550 gpu->ubwc_config.amsbc = 1;
551
552 if (adreno_is_a680(gpu))
553 gpu->ubwc_config.macrotile_mode = 1;
554
555 if (adreno_is_a650(gpu) ||
556 adreno_is_a660(gpu) ||
557 adreno_is_a690(gpu) ||
558 adreno_is_a730(gpu) ||
559 adreno_is_a740_family(gpu)) {
560 /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
561 gpu->ubwc_config.highest_bank_bit = 16;
562 gpu->ubwc_config.amsbc = 1;
563 gpu->ubwc_config.rgb565_predicator = 1;
564 gpu->ubwc_config.uavflagprd_inv = 2;
565 gpu->ubwc_config.macrotile_mode = 1;
566 }
567
568 if (adreno_is_7c3(gpu)) {
569 gpu->ubwc_config.highest_bank_bit = 14;
570 gpu->ubwc_config.amsbc = 1;
571 gpu->ubwc_config.uavflagprd_inv = 2;
572 gpu->ubwc_config.macrotile_mode = 1;
573 }
574
575 if (adreno_is_a702(gpu)) {
576 gpu->ubwc_config.highest_bank_bit = 14;
577 gpu->ubwc_config.min_acc_len = 1;
578 }
579 }
580
a6xx_set_ubwc_config(struct msm_gpu * gpu)581 static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
582 {
583 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
584 /*
585 * We subtract 13 from the highest bank bit (13 is the minimum value
586 * allowed by hw) and write the lowest two bits of the remaining value
587 * as hbb_lo and the one above it as hbb_hi to the hardware.
588 */
589 BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13);
590 u32 hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13;
591 u32 hbb_hi = hbb >> 2;
592 u32 hbb_lo = hbb & 3;
593 u32 ubwc_mode = adreno_gpu->ubwc_config.ubwc_swizzle & 1;
594 u32 level2_swizzling_dis = !(adreno_gpu->ubwc_config.ubwc_swizzle & 2);
595
596 gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
597 level2_swizzling_dis << 12 |
598 adreno_gpu->ubwc_config.rgb565_predicator << 11 |
599 hbb_hi << 10 | adreno_gpu->ubwc_config.amsbc << 4 |
600 adreno_gpu->ubwc_config.min_acc_len << 3 |
601 hbb_lo << 1 | ubwc_mode);
602
603 gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
604 level2_swizzling_dis << 6 | hbb_hi << 4 |
605 adreno_gpu->ubwc_config.min_acc_len << 3 |
606 hbb_lo << 1 | ubwc_mode);
607
608 gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
609 level2_swizzling_dis << 12 | hbb_hi << 10 |
610 adreno_gpu->ubwc_config.uavflagprd_inv << 4 |
611 adreno_gpu->ubwc_config.min_acc_len << 3 |
612 hbb_lo << 1 | ubwc_mode);
613
614 if (adreno_is_a7xx(adreno_gpu))
615 gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL,
616 FIELD_PREP(GENMASK(8, 5), hbb_lo));
617
618 gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL,
619 adreno_gpu->ubwc_config.min_acc_len << 23 | hbb_lo << 21);
620
621 gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL,
622 adreno_gpu->ubwc_config.macrotile_mode);
623 }
624
a6xx_cp_init(struct msm_gpu * gpu)625 static int a6xx_cp_init(struct msm_gpu *gpu)
626 {
627 struct msm_ringbuffer *ring = gpu->rb[0];
628
629 OUT_PKT7(ring, CP_ME_INIT, 8);
630
631 OUT_RING(ring, 0x0000002f);
632
633 /* Enable multiple hardware contexts */
634 OUT_RING(ring, 0x00000003);
635
636 /* Enable error detection */
637 OUT_RING(ring, 0x20000000);
638
639 /* Don't enable header dump */
640 OUT_RING(ring, 0x00000000);
641 OUT_RING(ring, 0x00000000);
642
643 /* No workarounds enabled */
644 OUT_RING(ring, 0x00000000);
645
646 /* Pad rest of the cmds with 0's */
647 OUT_RING(ring, 0x00000000);
648 OUT_RING(ring, 0x00000000);
649
650 a6xx_flush(gpu, ring);
651 return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
652 }
653
a7xx_cp_init(struct msm_gpu * gpu)654 static int a7xx_cp_init(struct msm_gpu *gpu)
655 {
656 struct msm_ringbuffer *ring = gpu->rb[0];
657 u32 mask;
658
659 /* Disable concurrent binning before sending CP init */
660 OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
661 OUT_RING(ring, BIT(27));
662
663 OUT_PKT7(ring, CP_ME_INIT, 7);
664
665 /* Use multiple HW contexts */
666 mask = BIT(0);
667
668 /* Enable error detection */
669 mask |= BIT(1);
670
671 /* Set default reset state */
672 mask |= BIT(3);
673
674 /* Disable save/restore of performance counters across preemption */
675 mask |= BIT(6);
676
677 /* Enable the register init list with the spinlock */
678 mask |= BIT(8);
679
680 OUT_RING(ring, mask);
681
682 /* Enable multiple hardware contexts */
683 OUT_RING(ring, 0x00000003);
684
685 /* Enable error detection */
686 OUT_RING(ring, 0x20000000);
687
688 /* Operation mode mask */
689 OUT_RING(ring, 0x00000002);
690
691 /* *Don't* send a power up reg list for concurrent binning (TODO) */
692 /* Lo address */
693 OUT_RING(ring, 0x00000000);
694 /* Hi address */
695 OUT_RING(ring, 0x00000000);
696 /* BIT(31) set => read the regs from the list */
697 OUT_RING(ring, 0x00000000);
698
699 a6xx_flush(gpu, ring);
700 return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
701 }
702
703 /*
704 * Check that the microcode version is new enough to include several key
705 * security fixes. Return true if the ucode is safe.
706 */
a6xx_ucode_check_version(struct a6xx_gpu * a6xx_gpu,struct drm_gem_object * obj)707 static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
708 struct drm_gem_object *obj)
709 {
710 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
711 struct msm_gpu *gpu = &adreno_gpu->base;
712 const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
713 u32 *buf = msm_gem_get_vaddr(obj);
714 bool ret = false;
715
716 if (IS_ERR(buf))
717 return false;
718
719 /* A7xx is safe! */
720 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu))
721 return true;
722
723 /*
724 * Targets up to a640 (a618, a630 and a640) need to check for a
725 * microcode version that is patched to support the whereami opcode or
726 * one that is new enough to include it by default.
727 *
728 * a650 tier targets don't need whereami but still need to be
729 * equal to or newer than 0.95 for other security fixes
730 *
731 * a660 targets have all the critical security fixes from the start
732 */
733 if (!strcmp(sqe_name, "a630_sqe.fw")) {
734 /*
735 * If the lowest nibble is 0xa that is an indication that this
736 * microcode has been patched. The actual version is in dword
737 * [3] but we only care about the patchlevel which is the lowest
738 * nibble of dword [3]
739 *
740 * Otherwise check that the firmware is greater than or equal
741 * to 1.90 which was the first version that had this fix built
742 * in
743 */
744 if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
745 (buf[0] & 0xfff) >= 0x190) {
746 a6xx_gpu->has_whereami = true;
747 ret = true;
748 goto out;
749 }
750
751 DRM_DEV_ERROR(&gpu->pdev->dev,
752 "a630 SQE ucode is too old. Have version %x need at least %x\n",
753 buf[0] & 0xfff, 0x190);
754 } else if (!strcmp(sqe_name, "a650_sqe.fw")) {
755 if ((buf[0] & 0xfff) >= 0x095) {
756 ret = true;
757 goto out;
758 }
759
760 DRM_DEV_ERROR(&gpu->pdev->dev,
761 "a650 SQE ucode is too old. Have version %x need at least %x\n",
762 buf[0] & 0xfff, 0x095);
763 } else if (!strcmp(sqe_name, "a660_sqe.fw")) {
764 ret = true;
765 } else {
766 DRM_DEV_ERROR(&gpu->pdev->dev,
767 "unknown GPU, add it to a6xx_ucode_check_version()!!\n");
768 }
769 out:
770 msm_gem_put_vaddr(obj);
771 return ret;
772 }
773
a6xx_ucode_load(struct msm_gpu * gpu)774 static int a6xx_ucode_load(struct msm_gpu *gpu)
775 {
776 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
777 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
778
779 if (!a6xx_gpu->sqe_bo) {
780 a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
781 adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
782
783 if (IS_ERR(a6xx_gpu->sqe_bo)) {
784 int ret = PTR_ERR(a6xx_gpu->sqe_bo);
785
786 a6xx_gpu->sqe_bo = NULL;
787 DRM_DEV_ERROR(&gpu->pdev->dev,
788 "Could not allocate SQE ucode: %d\n", ret);
789
790 return ret;
791 }
792
793 msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
794 if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
795 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
796 drm_gem_object_put(a6xx_gpu->sqe_bo);
797
798 a6xx_gpu->sqe_bo = NULL;
799 return -EPERM;
800 }
801 }
802
803 /*
804 * Expanded APRIV and targets that support WHERE_AM_I both need a
805 * privileged buffer to store the RPTR shadow
806 */
807 if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) &&
808 !a6xx_gpu->shadow_bo) {
809 a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
810 sizeof(u32) * gpu->nr_rings,
811 MSM_BO_WC | MSM_BO_MAP_PRIV,
812 gpu->aspace, &a6xx_gpu->shadow_bo,
813 &a6xx_gpu->shadow_iova);
814
815 if (IS_ERR(a6xx_gpu->shadow))
816 return PTR_ERR(a6xx_gpu->shadow);
817
818 msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
819 }
820
821 return 0;
822 }
823
a6xx_zap_shader_init(struct msm_gpu * gpu)824 static int a6xx_zap_shader_init(struct msm_gpu *gpu)
825 {
826 static bool loaded;
827 int ret;
828
829 if (loaded)
830 return 0;
831
832 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
833
834 loaded = !ret;
835 return ret;
836 }
837
838 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
839 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
840 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
841 A6XX_RBBM_INT_0_MASK_CP_IB2 | \
842 A6XX_RBBM_INT_0_MASK_CP_IB1 | \
843 A6XX_RBBM_INT_0_MASK_CP_RB | \
844 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
845 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
846 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
847 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
848 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
849
850 #define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
851 A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
852 A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
853 A6XX_RBBM_INT_0_MASK_CP_SW | \
854 A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
855 A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
856 A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
857 A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
858 A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
859 A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
860 A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
861 A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
862 A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
863 A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
864
865 #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \
866 A6XX_CP_APRIV_CNTL_RBFETCH | \
867 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \
868 A6XX_CP_APRIV_CNTL_RBRPWB)
869
870 #define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \
871 A6XX_CP_APRIV_CNTL_CDREAD | \
872 A6XX_CP_APRIV_CNTL_CDWRITE)
873
hw_init(struct msm_gpu * gpu)874 static int hw_init(struct msm_gpu *gpu)
875 {
876 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
877 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
878 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
879 u64 gmem_range_min;
880 int ret;
881
882 if (!adreno_has_gmu_wrapper(adreno_gpu)) {
883 /* Make sure the GMU keeps the GPU on while we set it up */
884 ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
885 if (ret)
886 return ret;
887 }
888
889 /* Clear GBIF halt in case GX domain was not collapsed */
890 if (adreno_is_a619_holi(adreno_gpu)) {
891 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
892 gpu_read(gpu, REG_A6XX_GBIF_HALT);
893
894 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0);
895 gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL);
896 } else if (a6xx_has_gbif(adreno_gpu)) {
897 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
898 gpu_read(gpu, REG_A6XX_GBIF_HALT);
899
900 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
901 gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT);
902 }
903
904 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
905
906 if (adreno_is_a619_holi(adreno_gpu))
907 a6xx_sptprac_enable(gmu);
908
909 /*
910 * Disable the trusted memory range - we don't actually supported secure
911 * memory rendering at this point in time and we don't want to block off
912 * part of the virtual memory space.
913 */
914 gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
915 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
916
917 if (!adreno_is_a7xx(adreno_gpu)) {
918 /* Turn on 64 bit addressing for all blocks */
919 gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
920 gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
921 gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
922 gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
923 gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
924 gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
925 gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
926 gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
927 gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
928 gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
929 gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
930 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
931 }
932
933 /* enable hardware clockgating */
934 a6xx_set_hwcg(gpu, true);
935
936 /* VBIF/GBIF start*/
937 if (adreno_is_a610_family(adreno_gpu) ||
938 adreno_is_a640_family(adreno_gpu) ||
939 adreno_is_a650_family(adreno_gpu) ||
940 adreno_is_a7xx(adreno_gpu)) {
941 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
942 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
943 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
944 gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
945 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL,
946 adreno_is_a7xx(adreno_gpu) ? 0x2120212 : 0x3);
947 } else {
948 gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
949 }
950
951 if (adreno_is_a630(adreno_gpu))
952 gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
953
954 if (adreno_is_a7xx(adreno_gpu))
955 gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0);
956
957 /* Make all blocks contribute to the GPU BUSY perf counter */
958 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
959
960 /* Disable L2 bypass in the UCHE */
961 if (adreno_is_a7xx(adreno_gpu)) {
962 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
963 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
964 } else {
965 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu);
966 gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
967 gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
968 }
969
970 if (!(adreno_is_a650_family(adreno_gpu) ||
971 adreno_is_a702(adreno_gpu) ||
972 adreno_is_a730(adreno_gpu))) {
973 gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M;
974
975 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
976 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min);
977
978 gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX,
979 gmem_range_min + adreno_gpu->info->gmem - 1);
980 }
981
982 if (adreno_is_a7xx(adreno_gpu))
983 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23));
984 else {
985 gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
986 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
987 }
988
989 if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) {
990 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
991 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
992 } else if (adreno_is_a610_family(adreno_gpu)) {
993 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060);
994 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16);
995 } else if (!adreno_is_a7xx(adreno_gpu)) {
996 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
997 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
998 }
999
1000 if (adreno_is_a660_family(adreno_gpu))
1001 gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
1002
1003 /* Setting the mem pool size */
1004 if (adreno_is_a610(adreno_gpu)) {
1005 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48);
1006 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47);
1007 } else if (adreno_is_a702(adreno_gpu)) {
1008 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64);
1009 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63);
1010 } else if (!adreno_is_a7xx(adreno_gpu))
1011 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
1012
1013
1014 /* Set the default primFifo threshold values */
1015 if (adreno_gpu->info->a6xx->prim_fifo_threshold)
1016 gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL,
1017 adreno_gpu->info->a6xx->prim_fifo_threshold);
1018
1019 /* Set the AHB default slave response to "ERROR" */
1020 gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1021
1022 /* Turn on performance counters */
1023 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1024
1025 if (adreno_is_a7xx(adreno_gpu)) {
1026 /* Turn on the IFPC counter (countable 4 on XOCLK4) */
1027 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1,
1028 FIELD_PREP(GENMASK(7, 0), 0x4));
1029 }
1030
1031 /* Select CP0 to always count cycles */
1032 gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1033
1034 a6xx_set_ubwc_config(gpu);
1035
1036 /* Enable fault detection */
1037 if (adreno_is_a730(adreno_gpu) ||
1038 adreno_is_a740_family(adreno_gpu))
1039 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff);
1040 else if (adreno_is_a690(adreno_gpu))
1041 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff);
1042 else if (adreno_is_a619(adreno_gpu))
1043 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff);
1044 else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
1045 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff);
1046 else
1047 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff);
1048
1049 gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
1050
1051 /* Set weights for bicubic filtering */
1052 if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) {
1053 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
1054 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
1055 0x3fe05ff4);
1056 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
1057 0x3fa0ebee);
1058 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
1059 0x3f5193ed);
1060 gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
1061 0x3f0243f0);
1062 }
1063
1064 /* Set up the CX GMU counter 0 to count busy ticks */
1065 gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
1066
1067 /* Enable the power counter */
1068 gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5));
1069 gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
1070
1071 /* Protect registers from the CP */
1072 a6xx_set_cp_protect(gpu);
1073
1074 if (adreno_is_a660_family(adreno_gpu)) {
1075 if (adreno_is_a690(adreno_gpu))
1076 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801);
1077 else
1078 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
1079 gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
1080 } else if (adreno_is_a702(adreno_gpu)) {
1081 /* Something to do with the HLSQ cluster */
1082 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24));
1083 }
1084
1085 if (adreno_is_a690(adreno_gpu))
1086 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90);
1087 /* Set dualQ + disable afull for A660 GPU */
1088 else if (adreno_is_a660(adreno_gpu))
1089 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
1090 else if (adreno_is_a7xx(adreno_gpu))
1091 gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG,
1092 FIELD_PREP(GENMASK(19, 16), 6) |
1093 FIELD_PREP(GENMASK(15, 12), 6) |
1094 FIELD_PREP(GENMASK(11, 8), 9) |
1095 BIT(3) | BIT(2) |
1096 FIELD_PREP(GENMASK(1, 0), 2));
1097
1098 /* Enable expanded apriv for targets that support it */
1099 if (gpu->hw_apriv) {
1100 if (adreno_is_a7xx(adreno_gpu)) {
1101 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1102 A7XX_BR_APRIVMASK);
1103 gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL,
1104 A7XX_APRIV_MASK);
1105 gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL,
1106 A7XX_APRIV_MASK);
1107 } else
1108 gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1109 BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1));
1110 }
1111
1112 if (adreno_is_a750(adreno_gpu)) {
1113 /* Disable ubwc merged UFC request feature */
1114 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19));
1115
1116 /* Enable TP flaghint and other performance settings */
1117 gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700);
1118 } else if (adreno_is_a7xx(adreno_gpu)) {
1119 /* Disable non-ubwc read reqs from passing write reqs */
1120 gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11));
1121 }
1122
1123 /* Enable interrupts */
1124 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK,
1125 adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK);
1126
1127 ret = adreno_hw_init(gpu);
1128 if (ret)
1129 goto out;
1130
1131 gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
1132
1133 /* Set the ringbuffer address */
1134 gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1135
1136 /* Targets that support extended APRIV can use the RPTR shadow from
1137 * hardware but all the other ones need to disable the feature. Targets
1138 * that support the WHERE_AM_I opcode can use that instead
1139 */
1140 if (adreno_gpu->base.hw_apriv)
1141 gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1142 else
1143 gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1144 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1145
1146 /* Configure the RPTR shadow if needed: */
1147 if (a6xx_gpu->shadow_bo) {
1148 gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR,
1149 shadowptr(a6xx_gpu, gpu->rb[0]));
1150 }
1151
1152 /* ..which means "always" on A7xx, also for BV shadow */
1153 if (adreno_is_a7xx(adreno_gpu)) {
1154 gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR,
1155 rbmemptr(gpu->rb[0], bv_fence));
1156 }
1157
1158 /* Always come up on rb 0 */
1159 a6xx_gpu->cur_ring = gpu->rb[0];
1160
1161 gpu->cur_ctx_seqno = 0;
1162
1163 /* Enable the SQE_to start the CP engine */
1164 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1165
1166 ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu);
1167 if (ret)
1168 goto out;
1169
1170 /*
1171 * Try to load a zap shader into the secure world. If successful
1172 * we can use the CP to switch out of secure mode. If not then we
1173 * have no resource but to try to switch ourselves out manually. If we
1174 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1175 * be blocked and a permissions violation will soon follow.
1176 */
1177 ret = a6xx_zap_shader_init(gpu);
1178 if (!ret) {
1179 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1180 OUT_RING(gpu->rb[0], 0x00000000);
1181
1182 a6xx_flush(gpu, gpu->rb[0]);
1183 if (!a6xx_idle(gpu, gpu->rb[0]))
1184 return -EINVAL;
1185 } else if (ret == -ENODEV) {
1186 /*
1187 * This device does not use zap shader (but print a warning
1188 * just in case someone got their dt wrong.. hopefully they
1189 * have a debug UART to realize the error of their ways...
1190 * if you mess this up you are about to crash horribly)
1191 */
1192 dev_warn_once(gpu->dev->dev,
1193 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1194 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1195 ret = 0;
1196 } else {
1197 return ret;
1198 }
1199
1200 out:
1201 if (adreno_has_gmu_wrapper(adreno_gpu))
1202 return ret;
1203 /*
1204 * Tell the GMU that we are done touching the GPU and it can start power
1205 * management
1206 */
1207 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1208
1209 if (a6xx_gpu->gmu.legacy) {
1210 /* Take the GMU out of its special boot mode */
1211 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1212 }
1213
1214 return ret;
1215 }
1216
a6xx_hw_init(struct msm_gpu * gpu)1217 static int a6xx_hw_init(struct msm_gpu *gpu)
1218 {
1219 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1220 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1221 int ret;
1222
1223 mutex_lock(&a6xx_gpu->gmu.lock);
1224 ret = hw_init(gpu);
1225 mutex_unlock(&a6xx_gpu->gmu.lock);
1226
1227 return ret;
1228 }
1229
a6xx_dump(struct msm_gpu * gpu)1230 static void a6xx_dump(struct msm_gpu *gpu)
1231 {
1232 DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n",
1233 gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1234 adreno_dump(gpu);
1235 }
1236
a6xx_recover(struct msm_gpu * gpu)1237 static void a6xx_recover(struct msm_gpu *gpu)
1238 {
1239 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1240 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1241 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1242 int i, active_submits;
1243
1244 adreno_dump_info(gpu);
1245
1246 for (i = 0; i < 8; i++)
1247 DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
1248 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
1249
1250 if (hang_debug)
1251 a6xx_dump(gpu);
1252
1253 /*
1254 * To handle recovery specific sequences during the rpm suspend we are
1255 * about to trigger
1256 */
1257 a6xx_gpu->hung = true;
1258
1259 /* Halt SQE first */
1260 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1261
1262 pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1263
1264 /* active_submit won't change until we make a submission */
1265 mutex_lock(&gpu->active_lock);
1266 active_submits = gpu->active_submits;
1267
1268 /*
1269 * Temporarily clear active_submits count to silence a WARN() in the
1270 * runtime suspend cb
1271 */
1272 gpu->active_submits = 0;
1273
1274 if (adreno_has_gmu_wrapper(adreno_gpu)) {
1275 /* Drain the outstanding traffic on memory buses */
1276 a6xx_bus_clear_pending_transactions(adreno_gpu, true);
1277
1278 /* Reset the GPU to a clean state */
1279 a6xx_gpu_sw_reset(gpu, true);
1280 a6xx_gpu_sw_reset(gpu, false);
1281 }
1282
1283 reinit_completion(&gmu->pd_gate);
1284 dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
1285 dev_pm_genpd_synced_poweroff(gmu->cxpd);
1286
1287 /* Drop the rpm refcount from active submits */
1288 if (active_submits)
1289 pm_runtime_put(&gpu->pdev->dev);
1290
1291 /* And the final one from recover worker */
1292 pm_runtime_put_sync(&gpu->pdev->dev);
1293
1294 if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
1295 DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
1296
1297 dev_pm_genpd_remove_notifier(gmu->cxpd);
1298
1299 pm_runtime_use_autosuspend(&gpu->pdev->dev);
1300
1301 if (active_submits)
1302 pm_runtime_get(&gpu->pdev->dev);
1303
1304 pm_runtime_get_sync(&gpu->pdev->dev);
1305
1306 gpu->active_submits = active_submits;
1307 mutex_unlock(&gpu->active_lock);
1308
1309 msm_gpu_hw_init(gpu);
1310 a6xx_gpu->hung = false;
1311 }
1312
a6xx_uche_fault_block(struct msm_gpu * gpu,u32 mid)1313 static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1314 {
1315 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1316 static const char *uche_clients[7] = {
1317 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1318 };
1319 u32 val;
1320
1321 if (adreno_is_a7xx(adreno_gpu)) {
1322 if (mid != 1 && mid != 2 && mid != 3 && mid != 8)
1323 return "UNKNOWN";
1324 } else {
1325 if (mid < 1 || mid > 3)
1326 return "UNKNOWN";
1327 }
1328
1329 /*
1330 * The source of the data depends on the mid ID read from FSYNR1.
1331 * and the client ID read from the UCHE block
1332 */
1333 val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1334
1335 if (adreno_is_a7xx(adreno_gpu)) {
1336 /* Bit 3 for mid=3 indicates BR or BV */
1337 static const char *uche_clients_a7xx[16] = {
1338 "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC",
1339 "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
1340 "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC",
1341 "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
1342 };
1343
1344 /* LPAC has the same clients as BR and BV, but because it is
1345 * compute-only some of them do not exist and there are holes
1346 * in the array.
1347 */
1348 static const char *uche_clients_lpac_a7xx[8] = {
1349 "-", "LPAC_SP", "-", "-",
1350 "LPAC_HLSQ", "-", "-", "LPAC_TP",
1351 };
1352
1353 val &= GENMASK(6, 0);
1354
1355 /* mid=3 refers to BR or BV */
1356 if (mid == 3) {
1357 if (val < ARRAY_SIZE(uche_clients_a7xx))
1358 return uche_clients_a7xx[val];
1359 else
1360 return "UCHE";
1361 }
1362
1363 /* mid=8 refers to LPAC */
1364 if (mid == 8) {
1365 if (val < ARRAY_SIZE(uche_clients_lpac_a7xx))
1366 return uche_clients_lpac_a7xx[val];
1367 else
1368 return "UCHE_LPAC";
1369 }
1370
1371 /* mid=2 is a catchall for everything else in LPAC */
1372 if (mid == 2)
1373 return "UCHE_LPAC";
1374
1375 /* mid=1 is a catchall for everything else in BR/BV */
1376 return "UCHE";
1377 } else if (adreno_is_a660_family(adreno_gpu)) {
1378 static const char *uche_clients_a660[8] = {
1379 "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP",
1380 };
1381
1382 static const char *uche_clients_a660_not[8] = {
1383 "not VFD", "not SP", "not VSC", "not VPC",
1384 "not HLSQ", "not PC", "not LRZ", "not TP",
1385 };
1386
1387 val &= GENMASK(6, 0);
1388
1389 if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660))
1390 return uche_clients_a660[val];
1391
1392 if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not))
1393 return uche_clients_a660_not[val];
1394
1395 return "UCHE";
1396 } else {
1397 /* mid = 3 is most precise and refers to only one block per client */
1398 if (mid == 3)
1399 return uche_clients[val & 7];
1400
1401 /* For mid=2 the source is TP or VFD except when the client id is 0 */
1402 if (mid == 2)
1403 return ((val & 7) == 0) ? "TP" : "TP|VFD";
1404
1405 /* For mid=1 just return "UCHE" as a catchall for everything else */
1406 return "UCHE";
1407 }
1408 }
1409
a6xx_fault_block(struct msm_gpu * gpu,u32 id)1410 static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1411 {
1412 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1413
1414 if (id == 0)
1415 return "CP";
1416 else if (id == 4)
1417 return "CCU";
1418 else if (id == 6)
1419 return "CDP Prefetch";
1420 else if (id == 7)
1421 return "GMU";
1422 else if (id == 5 && adreno_is_a7xx(adreno_gpu))
1423 return "Flag cache";
1424
1425 return a6xx_uche_fault_block(gpu, id);
1426 }
1427
a6xx_fault_handler(void * arg,unsigned long iova,int flags,void * data)1428 static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1429 {
1430 struct msm_gpu *gpu = arg;
1431 struct adreno_smmu_fault_info *info = data;
1432 const char *block = "unknown";
1433
1434 u32 scratch[] = {
1435 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1436 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1437 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1438 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)),
1439 };
1440
1441 if (info)
1442 block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
1443
1444 return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
1445 }
1446
a6xx_cp_hw_err_irq(struct msm_gpu * gpu)1447 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1448 {
1449 u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1450
1451 if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1452 u32 val;
1453
1454 gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1455 val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1456 dev_err_ratelimited(&gpu->pdev->dev,
1457 "CP | opcode error | possible opcode=0x%8.8X\n",
1458 val);
1459 }
1460
1461 if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1462 dev_err_ratelimited(&gpu->pdev->dev,
1463 "CP ucode error interrupt\n");
1464
1465 if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1466 dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1467 gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1468
1469 if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1470 u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1471
1472 dev_err_ratelimited(&gpu->pdev->dev,
1473 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1474 val & (1 << 20) ? "READ" : "WRITE",
1475 (val & 0x3ffff), val);
1476 }
1477
1478 if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu)))
1479 dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1480
1481 if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1482 dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1483
1484 if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1485 dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1486
1487 }
1488
a6xx_fault_detect_irq(struct msm_gpu * gpu)1489 static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1490 {
1491 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1492 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1493 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1494
1495 /*
1496 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1497 * but the fault handler will trigger the devcore dump, and we want
1498 * to otherwise resume normally rather than killing the submit, so
1499 * just bail.
1500 */
1501 if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1502 return;
1503
1504 /*
1505 * Force the GPU to stay on until after we finish
1506 * collecting information
1507 */
1508 if (!adreno_has_gmu_wrapper(adreno_gpu))
1509 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
1510
1511 DRM_DEV_ERROR(&gpu->pdev->dev,
1512 "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1513 ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1514 gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1515 gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1516 gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1517 gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1518 gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1519 gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1520 gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1521
1522 /* Turn off the hangcheck timer to keep it from bothering us */
1523 del_timer(&gpu->hangcheck_timer);
1524
1525 kthread_queue_work(gpu->worker, &gpu->recover_work);
1526 }
1527
a7xx_sw_fuse_violation_irq(struct msm_gpu * gpu)1528 static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
1529 {
1530 u32 status;
1531
1532 status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS);
1533 gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0);
1534
1535 dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
1536
1537 /*
1538 * Ignore FASTBLEND violations, because the HW will silently fall back
1539 * to legacy blending.
1540 */
1541 if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1542 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
1543 del_timer(&gpu->hangcheck_timer);
1544
1545 kthread_queue_work(gpu->worker, &gpu->recover_work);
1546 }
1547 }
1548
a6xx_irq(struct msm_gpu * gpu)1549 static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1550 {
1551 struct msm_drm_private *priv = gpu->dev->dev_private;
1552 u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1553
1554 gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1555
1556 if (priv->disable_err_irq)
1557 status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1558
1559 if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1560 a6xx_fault_detect_irq(gpu);
1561
1562 if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1563 dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
1564
1565 if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1566 a6xx_cp_hw_err_irq(gpu);
1567
1568 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1569 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1570
1571 if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1572 dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1573
1574 if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1575 dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1576
1577 if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1578 a7xx_sw_fuse_violation_irq(gpu);
1579
1580 if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
1581 msm_gpu_retire(gpu);
1582
1583 return IRQ_HANDLED;
1584 }
1585
a6xx_llc_deactivate(struct a6xx_gpu * a6xx_gpu)1586 static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
1587 {
1588 llcc_slice_deactivate(a6xx_gpu->llc_slice);
1589 llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
1590 }
1591
a6xx_llc_activate(struct a6xx_gpu * a6xx_gpu)1592 static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1593 {
1594 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1595 struct msm_gpu *gpu = &adreno_gpu->base;
1596 u32 cntl1_regval = 0;
1597
1598 if (IS_ERR(a6xx_gpu->llc_mmio))
1599 return;
1600
1601 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1602 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1603
1604 gpu_scid &= 0x1f;
1605 cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
1606 (gpu_scid << 15) | (gpu_scid << 20);
1607
1608 /* On A660, the SCID programming for UCHE traffic is done in
1609 * A6XX_GBIF_SCACHE_CNTL0[14:10]
1610 */
1611 if (adreno_is_a660_family(adreno_gpu))
1612 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
1613 (1 << 8), (gpu_scid << 10) | (1 << 8));
1614 }
1615
1616 /*
1617 * For targets with a MMU500, activate the slice but don't program the
1618 * register. The XBL will take care of that.
1619 */
1620 if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
1621 if (!a6xx_gpu->have_mmu500) {
1622 u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
1623
1624 gpuhtw_scid &= 0x1f;
1625 cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
1626 }
1627 }
1628
1629 if (!cntl1_regval)
1630 return;
1631
1632 /*
1633 * Program the slice IDs for the various GPU blocks and GPU MMU
1634 * pagetables
1635 */
1636 if (!a6xx_gpu->have_mmu500) {
1637 a6xx_llc_write(a6xx_gpu,
1638 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
1639
1640 /*
1641 * Program cacheability overrides to not allocate cache
1642 * lines on a write miss
1643 */
1644 a6xx_llc_rmw(a6xx_gpu,
1645 REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
1646 return;
1647 }
1648
1649 gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
1650 }
1651
a7xx_llc_activate(struct a6xx_gpu * a6xx_gpu)1652 static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1653 {
1654 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1655 struct msm_gpu *gpu = &adreno_gpu->base;
1656
1657 if (IS_ERR(a6xx_gpu->llc_mmio))
1658 return;
1659
1660 if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1661 u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1662
1663 gpu_scid &= GENMASK(4, 0);
1664
1665 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
1666 FIELD_PREP(GENMASK(29, 25), gpu_scid) |
1667 FIELD_PREP(GENMASK(24, 20), gpu_scid) |
1668 FIELD_PREP(GENMASK(19, 15), gpu_scid) |
1669 FIELD_PREP(GENMASK(14, 10), gpu_scid) |
1670 FIELD_PREP(GENMASK(9, 5), gpu_scid) |
1671 FIELD_PREP(GENMASK(4, 0), gpu_scid));
1672
1673 gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
1674 FIELD_PREP(GENMASK(14, 10), gpu_scid) |
1675 BIT(8));
1676 }
1677
1678 llcc_slice_activate(a6xx_gpu->htw_llc_slice);
1679 }
1680
a6xx_llc_slices_destroy(struct a6xx_gpu * a6xx_gpu)1681 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
1682 {
1683 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
1684 if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
1685 return;
1686
1687 llcc_slice_putd(a6xx_gpu->llc_slice);
1688 llcc_slice_putd(a6xx_gpu->htw_llc_slice);
1689 }
1690
a6xx_llc_slices_init(struct platform_device * pdev,struct a6xx_gpu * a6xx_gpu,bool is_a7xx)1691 static void a6xx_llc_slices_init(struct platform_device *pdev,
1692 struct a6xx_gpu *a6xx_gpu, bool is_a7xx)
1693 {
1694 struct device_node *phandle;
1695
1696 /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
1697 if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
1698 return;
1699
1700 /*
1701 * There is a different programming path for A6xx targets with an
1702 * mmu500 attached, so detect if that is the case
1703 */
1704 phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
1705 a6xx_gpu->have_mmu500 = (phandle &&
1706 of_device_is_compatible(phandle, "arm,mmu-500"));
1707 of_node_put(phandle);
1708
1709 if (is_a7xx || !a6xx_gpu->have_mmu500)
1710 a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
1711 else
1712 a6xx_gpu->llc_mmio = NULL;
1713
1714 a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
1715 a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
1716
1717 if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1718 a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
1719 }
1720
a7xx_cx_mem_init(struct a6xx_gpu * a6xx_gpu)1721 static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu)
1722 {
1723 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1724 struct msm_gpu *gpu = &adreno_gpu->base;
1725 u32 fuse_val;
1726 int ret;
1727
1728 if (adreno_is_a750(adreno_gpu)) {
1729 /*
1730 * Assume that if qcom scm isn't available, that whatever
1731 * replacement allows writing the fuse register ourselves.
1732 * Users of alternative firmware need to make sure this
1733 * register is writeable or indicate that it's not somehow.
1734 * Print a warning because if you mess this up you're about to
1735 * crash horribly.
1736 */
1737 if (!qcom_scm_is_available()) {
1738 dev_warn_once(gpu->dev->dev,
1739 "SCM is not available, poking fuse register\n");
1740 a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE,
1741 A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1742 A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND |
1743 A7XX_CX_MISC_SW_FUSE_VALUE_LPAC);
1744 adreno_gpu->has_ray_tracing = true;
1745 return 0;
1746 }
1747
1748 ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ |
1749 QCOM_SCM_GPU_TSENSE_EN_REQ);
1750 if (ret)
1751 return ret;
1752
1753 /*
1754 * On a750 raytracing may be disabled by the firmware, find out
1755 * whether that's the case. The scm call above sets the fuse
1756 * register.
1757 */
1758 fuse_val = a6xx_llc_read(a6xx_gpu,
1759 REG_A7XX_CX_MISC_SW_FUSE_VALUE);
1760 adreno_gpu->has_ray_tracing =
1761 !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING);
1762 } else if (adreno_is_a740(adreno_gpu)) {
1763 /* Raytracing is always enabled on a740 */
1764 adreno_gpu->has_ray_tracing = true;
1765 }
1766
1767 return 0;
1768 }
1769
1770
1771 #define GBIF_CLIENT_HALT_MASK BIT(0)
1772 #define GBIF_ARB_HALT_MASK BIT(1)
1773 #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0)
1774 #define VBIF_RESET_ACK_MASK 0xF0
1775 #define GPR0_GBIF_HALT_REQUEST 0x1E0
1776
a6xx_bus_clear_pending_transactions(struct adreno_gpu * adreno_gpu,bool gx_off)1777 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
1778 {
1779 struct msm_gpu *gpu = &adreno_gpu->base;
1780
1781 if (adreno_is_a619_holi(adreno_gpu)) {
1782 gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST);
1783 spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) &
1784 (VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK);
1785 } else if (!a6xx_has_gbif(adreno_gpu)) {
1786 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK);
1787 spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
1788 (VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK);
1789 gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
1790
1791 return;
1792 }
1793
1794 if (gx_off) {
1795 /* Halt the gx side of GBIF */
1796 gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
1797 spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
1798 }
1799
1800 /* Halt new client requests on GBIF */
1801 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
1802 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1803 (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
1804
1805 /* Halt all AXI requests on GBIF */
1806 gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
1807 spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1808 (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
1809
1810 /* The GBIF halt needs to be explicitly cleared */
1811 gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
1812 }
1813
a6xx_gpu_sw_reset(struct msm_gpu * gpu,bool assert)1814 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
1815 {
1816 /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */
1817 if (adreno_is_a610(to_adreno_gpu(gpu)))
1818 return;
1819
1820 gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
1821 /* Perform a bogus read and add a brief delay to ensure ordering. */
1822 gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD);
1823 udelay(1);
1824
1825 /* The reset line needs to be asserted for at least 100 us */
1826 if (assert)
1827 udelay(100);
1828 }
1829
a6xx_gmu_pm_resume(struct msm_gpu * gpu)1830 static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
1831 {
1832 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1833 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1834 int ret;
1835
1836 gpu->needs_hw_init = true;
1837
1838 trace_msm_gpu_resume(0);
1839
1840 mutex_lock(&a6xx_gpu->gmu.lock);
1841 ret = a6xx_gmu_resume(a6xx_gpu);
1842 mutex_unlock(&a6xx_gpu->gmu.lock);
1843 if (ret)
1844 return ret;
1845
1846 msm_devfreq_resume(gpu);
1847
1848 adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : a6xx_llc_activate(a6xx_gpu);
1849
1850 return ret;
1851 }
1852
a6xx_pm_resume(struct msm_gpu * gpu)1853 static int a6xx_pm_resume(struct msm_gpu *gpu)
1854 {
1855 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1856 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1857 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1858 unsigned long freq = gpu->fast_rate;
1859 struct dev_pm_opp *opp;
1860 int ret;
1861
1862 gpu->needs_hw_init = true;
1863
1864 trace_msm_gpu_resume(0);
1865
1866 mutex_lock(&a6xx_gpu->gmu.lock);
1867
1868 opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq);
1869 if (IS_ERR(opp)) {
1870 ret = PTR_ERR(opp);
1871 goto err_set_opp;
1872 }
1873 dev_pm_opp_put(opp);
1874
1875 /* Set the core clock and bus bw, having VDD scaling in mind */
1876 dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
1877
1878 pm_runtime_resume_and_get(gmu->dev);
1879 pm_runtime_resume_and_get(gmu->gxpd);
1880
1881 ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
1882 if (ret)
1883 goto err_bulk_clk;
1884
1885 if (adreno_is_a619_holi(adreno_gpu))
1886 a6xx_sptprac_enable(gmu);
1887
1888 /* If anything goes south, tear the GPU down piece by piece.. */
1889 if (ret) {
1890 err_bulk_clk:
1891 pm_runtime_put(gmu->gxpd);
1892 pm_runtime_put(gmu->dev);
1893 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
1894 }
1895 err_set_opp:
1896 mutex_unlock(&a6xx_gpu->gmu.lock);
1897
1898 if (!ret)
1899 msm_devfreq_resume(gpu);
1900
1901 return ret;
1902 }
1903
a6xx_gmu_pm_suspend(struct msm_gpu * gpu)1904 static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu)
1905 {
1906 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1907 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1908 int i, ret;
1909
1910 trace_msm_gpu_suspend(0);
1911
1912 a6xx_llc_deactivate(a6xx_gpu);
1913
1914 msm_devfreq_suspend(gpu);
1915
1916 mutex_lock(&a6xx_gpu->gmu.lock);
1917 ret = a6xx_gmu_stop(a6xx_gpu);
1918 mutex_unlock(&a6xx_gpu->gmu.lock);
1919 if (ret)
1920 return ret;
1921
1922 if (a6xx_gpu->shadow_bo)
1923 for (i = 0; i < gpu->nr_rings; i++)
1924 a6xx_gpu->shadow[i] = 0;
1925
1926 gpu->suspend_count++;
1927
1928 return 0;
1929 }
1930
a6xx_pm_suspend(struct msm_gpu * gpu)1931 static int a6xx_pm_suspend(struct msm_gpu *gpu)
1932 {
1933 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1934 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1935 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1936 int i;
1937
1938 trace_msm_gpu_suspend(0);
1939
1940 msm_devfreq_suspend(gpu);
1941
1942 mutex_lock(&a6xx_gpu->gmu.lock);
1943
1944 /* Drain the outstanding traffic on memory buses */
1945 a6xx_bus_clear_pending_transactions(adreno_gpu, true);
1946
1947 if (adreno_is_a619_holi(adreno_gpu))
1948 a6xx_sptprac_disable(gmu);
1949
1950 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
1951
1952 pm_runtime_put_sync(gmu->gxpd);
1953 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
1954 pm_runtime_put_sync(gmu->dev);
1955
1956 mutex_unlock(&a6xx_gpu->gmu.lock);
1957
1958 if (a6xx_gpu->shadow_bo)
1959 for (i = 0; i < gpu->nr_rings; i++)
1960 a6xx_gpu->shadow[i] = 0;
1961
1962 gpu->suspend_count++;
1963
1964 return 0;
1965 }
1966
a6xx_gmu_get_timestamp(struct msm_gpu * gpu,uint64_t * value)1967 static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1968 {
1969 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1970 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1971
1972 mutex_lock(&a6xx_gpu->gmu.lock);
1973
1974 /* Force the GPU power on so we can read this register */
1975 a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1976
1977 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
1978
1979 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1980
1981 mutex_unlock(&a6xx_gpu->gmu.lock);
1982
1983 return 0;
1984 }
1985
a6xx_get_timestamp(struct msm_gpu * gpu,uint64_t * value)1986 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1987 {
1988 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
1989 return 0;
1990 }
1991
a6xx_active_ring(struct msm_gpu * gpu)1992 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
1993 {
1994 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1995 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1996
1997 return a6xx_gpu->cur_ring;
1998 }
1999
a6xx_destroy(struct msm_gpu * gpu)2000 static void a6xx_destroy(struct msm_gpu *gpu)
2001 {
2002 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2003 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2004
2005 if (a6xx_gpu->sqe_bo) {
2006 msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
2007 drm_gem_object_put(a6xx_gpu->sqe_bo);
2008 }
2009
2010 if (a6xx_gpu->shadow_bo) {
2011 msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace);
2012 drm_gem_object_put(a6xx_gpu->shadow_bo);
2013 }
2014
2015 a6xx_llc_slices_destroy(a6xx_gpu);
2016
2017 a6xx_gmu_remove(a6xx_gpu);
2018
2019 adreno_gpu_cleanup(adreno_gpu);
2020
2021 kfree(a6xx_gpu);
2022 }
2023
a6xx_gpu_busy(struct msm_gpu * gpu,unsigned long * out_sample_rate)2024 static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
2025 {
2026 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2027 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2028 u64 busy_cycles;
2029
2030 /* 19.2MHz */
2031 *out_sample_rate = 19200000;
2032
2033 busy_cycles = gmu_read64(&a6xx_gpu->gmu,
2034 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
2035 REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
2036
2037 return busy_cycles;
2038 }
2039
a6xx_gpu_set_freq(struct msm_gpu * gpu,struct dev_pm_opp * opp,bool suspended)2040 static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
2041 bool suspended)
2042 {
2043 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2044 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2045
2046 mutex_lock(&a6xx_gpu->gmu.lock);
2047 a6xx_gmu_set_freq(gpu, opp, suspended);
2048 mutex_unlock(&a6xx_gpu->gmu.lock);
2049 }
2050
2051 static struct msm_gem_address_space *
a6xx_create_address_space(struct msm_gpu * gpu,struct platform_device * pdev)2052 a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
2053 {
2054 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2055 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2056 unsigned long quirks = 0;
2057
2058 /*
2059 * This allows GPU to set the bus attributes required to use system
2060 * cache on behalf of the iommu page table walker.
2061 */
2062 if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) &&
2063 !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY))
2064 quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
2065
2066 return adreno_iommu_create_address_space(gpu, pdev, quirks);
2067 }
2068
2069 static struct msm_gem_address_space *
a6xx_create_private_address_space(struct msm_gpu * gpu)2070 a6xx_create_private_address_space(struct msm_gpu *gpu)
2071 {
2072 struct msm_mmu *mmu;
2073
2074 mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
2075
2076 if (IS_ERR(mmu))
2077 return ERR_CAST(mmu);
2078
2079 return msm_gem_address_space_create(mmu,
2080 "gpu", 0x100000000ULL,
2081 adreno_private_address_space_size(gpu));
2082 }
2083
a6xx_get_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)2084 static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2085 {
2086 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2087 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2088
2089 if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
2090 return a6xx_gpu->shadow[ring->id];
2091
2092 return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
2093 }
2094
a6xx_progress(struct msm_gpu * gpu,struct msm_ringbuffer * ring)2095 static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2096 {
2097 struct msm_cp_state cp_state = {
2098 .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
2099 .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
2100 .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
2101 .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
2102 };
2103 bool progress;
2104
2105 /*
2106 * Adjust the remaining data to account for what has already been
2107 * fetched from memory, but not yet consumed by the SQE.
2108 *
2109 * This is not *technically* correct, the amount buffered could
2110 * exceed the IB size due to hw prefetching ahead, but:
2111 *
2112 * (1) We aren't trying to find the exact position, just whether
2113 * progress has been made
2114 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
2115 * to prevent prefetching into an unrelated submit. (And
2116 * either way, at some point the ROQ will be full.)
2117 */
2118 cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16;
2119 cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16;
2120
2121 progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
2122
2123 ring->last_cp_state = cp_state;
2124
2125 return progress;
2126 }
2127
fuse_to_supp_hw(const struct adreno_info * info,u32 fuse)2128 static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse)
2129 {
2130 if (!info->speedbins)
2131 return UINT_MAX;
2132
2133 for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++)
2134 if (info->speedbins[i].fuse == fuse)
2135 return BIT(info->speedbins[i].speedbin);
2136
2137 return UINT_MAX;
2138 }
2139
a6xx_set_supported_hw(struct device * dev,const struct adreno_info * info)2140 static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *info)
2141 {
2142 u32 supp_hw;
2143 u32 speedbin;
2144 int ret;
2145
2146 ret = adreno_read_speedbin(dev, &speedbin);
2147 /*
2148 * -ENOENT means that the platform doesn't support speedbin which is
2149 * fine
2150 */
2151 if (ret == -ENOENT) {
2152 return 0;
2153 } else if (ret) {
2154 dev_err_probe(dev, ret,
2155 "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
2156 return ret;
2157 }
2158
2159 supp_hw = fuse_to_supp_hw(info, speedbin);
2160
2161 if (supp_hw == UINT_MAX) {
2162 DRM_DEV_ERROR(dev,
2163 "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
2164 speedbin);
2165 supp_hw = BIT(0); /* Default */
2166 }
2167
2168 ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
2169 if (ret)
2170 return ret;
2171
2172 return 0;
2173 }
2174
2175 static const struct adreno_gpu_funcs funcs = {
2176 .base = {
2177 .get_param = adreno_get_param,
2178 .set_param = adreno_set_param,
2179 .hw_init = a6xx_hw_init,
2180 .ucode_load = a6xx_ucode_load,
2181 .pm_suspend = a6xx_gmu_pm_suspend,
2182 .pm_resume = a6xx_gmu_pm_resume,
2183 .recover = a6xx_recover,
2184 .submit = a6xx_submit,
2185 .active_ring = a6xx_active_ring,
2186 .irq = a6xx_irq,
2187 .destroy = a6xx_destroy,
2188 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2189 .show = a6xx_show,
2190 #endif
2191 .gpu_busy = a6xx_gpu_busy,
2192 .gpu_get_freq = a6xx_gmu_get_freq,
2193 .gpu_set_freq = a6xx_gpu_set_freq,
2194 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2195 .gpu_state_get = a6xx_gpu_state_get,
2196 .gpu_state_put = a6xx_gpu_state_put,
2197 #endif
2198 .create_address_space = a6xx_create_address_space,
2199 .create_private_address_space = a6xx_create_private_address_space,
2200 .get_rptr = a6xx_get_rptr,
2201 .progress = a6xx_progress,
2202 },
2203 .get_timestamp = a6xx_gmu_get_timestamp,
2204 };
2205
2206 static const struct adreno_gpu_funcs funcs_gmuwrapper = {
2207 .base = {
2208 .get_param = adreno_get_param,
2209 .set_param = adreno_set_param,
2210 .hw_init = a6xx_hw_init,
2211 .ucode_load = a6xx_ucode_load,
2212 .pm_suspend = a6xx_pm_suspend,
2213 .pm_resume = a6xx_pm_resume,
2214 .recover = a6xx_recover,
2215 .submit = a6xx_submit,
2216 .active_ring = a6xx_active_ring,
2217 .irq = a6xx_irq,
2218 .destroy = a6xx_destroy,
2219 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2220 .show = a6xx_show,
2221 #endif
2222 .gpu_busy = a6xx_gpu_busy,
2223 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2224 .gpu_state_get = a6xx_gpu_state_get,
2225 .gpu_state_put = a6xx_gpu_state_put,
2226 #endif
2227 .create_address_space = a6xx_create_address_space,
2228 .create_private_address_space = a6xx_create_private_address_space,
2229 .get_rptr = a6xx_get_rptr,
2230 .progress = a6xx_progress,
2231 },
2232 .get_timestamp = a6xx_get_timestamp,
2233 };
2234
2235 static const struct adreno_gpu_funcs funcs_a7xx = {
2236 .base = {
2237 .get_param = adreno_get_param,
2238 .set_param = adreno_set_param,
2239 .hw_init = a6xx_hw_init,
2240 .ucode_load = a6xx_ucode_load,
2241 .pm_suspend = a6xx_gmu_pm_suspend,
2242 .pm_resume = a6xx_gmu_pm_resume,
2243 .recover = a6xx_recover,
2244 .submit = a7xx_submit,
2245 .active_ring = a6xx_active_ring,
2246 .irq = a6xx_irq,
2247 .destroy = a6xx_destroy,
2248 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2249 .show = a6xx_show,
2250 #endif
2251 .gpu_busy = a6xx_gpu_busy,
2252 .gpu_get_freq = a6xx_gmu_get_freq,
2253 .gpu_set_freq = a6xx_gpu_set_freq,
2254 #if defined(CONFIG_DRM_MSM_GPU_STATE)
2255 .gpu_state_get = a6xx_gpu_state_get,
2256 .gpu_state_put = a6xx_gpu_state_put,
2257 #endif
2258 .create_address_space = a6xx_create_address_space,
2259 .create_private_address_space = a6xx_create_private_address_space,
2260 .get_rptr = a6xx_get_rptr,
2261 .progress = a6xx_progress,
2262 },
2263 .get_timestamp = a6xx_gmu_get_timestamp,
2264 };
2265
a6xx_gpu_init(struct drm_device * dev)2266 struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
2267 {
2268 struct msm_drm_private *priv = dev->dev_private;
2269 struct platform_device *pdev = priv->gpu_pdev;
2270 struct adreno_platform_config *config = pdev->dev.platform_data;
2271 struct device_node *node;
2272 struct a6xx_gpu *a6xx_gpu;
2273 struct adreno_gpu *adreno_gpu;
2274 struct msm_gpu *gpu;
2275 bool is_a7xx;
2276 int ret;
2277
2278 a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
2279 if (!a6xx_gpu)
2280 return ERR_PTR(-ENOMEM);
2281
2282 adreno_gpu = &a6xx_gpu->base;
2283 gpu = &adreno_gpu->base;
2284
2285 mutex_init(&a6xx_gpu->gmu.lock);
2286
2287 adreno_gpu->registers = NULL;
2288
2289 /* Check if there is a GMU phandle and set it up */
2290 node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2291 /* FIXME: How do we gracefully handle this? */
2292 BUG_ON(!node);
2293
2294 adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper");
2295
2296 adreno_gpu->base.hw_apriv =
2297 !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
2298
2299 /* gpu->info only gets assigned in adreno_gpu_init() */
2300 is_a7xx = config->info->family == ADRENO_7XX_GEN1 ||
2301 config->info->family == ADRENO_7XX_GEN2 ||
2302 config->info->family == ADRENO_7XX_GEN3;
2303
2304 a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
2305
2306 ret = a6xx_set_supported_hw(&pdev->dev, config->info);
2307 if (ret) {
2308 a6xx_llc_slices_destroy(a6xx_gpu);
2309 kfree(a6xx_gpu);
2310 return ERR_PTR(ret);
2311 }
2312
2313 if (is_a7xx)
2314 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 1);
2315 else if (adreno_has_gmu_wrapper(adreno_gpu))
2316 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1);
2317 else
2318 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
2319 if (ret) {
2320 a6xx_destroy(&(a6xx_gpu->base.base));
2321 return ERR_PTR(ret);
2322 }
2323
2324 /*
2325 * For now only clamp to idle freq for devices where this is known not
2326 * to cause power supply issues:
2327 */
2328 if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2329 priv->gpu_clamp_to_idle = true;
2330
2331 if (adreno_has_gmu_wrapper(adreno_gpu))
2332 ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
2333 else
2334 ret = a6xx_gmu_init(a6xx_gpu, node);
2335 of_node_put(node);
2336 if (ret) {
2337 a6xx_destroy(&(a6xx_gpu->base.base));
2338 return ERR_PTR(ret);
2339 }
2340
2341 if (adreno_is_a7xx(adreno_gpu)) {
2342 ret = a7xx_cx_mem_init(a6xx_gpu);
2343 if (ret) {
2344 a6xx_destroy(&(a6xx_gpu->base.base));
2345 return ERR_PTR(ret);
2346 }
2347 }
2348
2349 if (gpu->aspace)
2350 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
2351 a6xx_fault_handler);
2352
2353 a6xx_calc_ubwc_config(adreno_gpu);
2354
2355 return gpu;
2356 }
2357