• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
cik_get_allowed_info_register(struct radeon_device * rdev,u32 reg,u32 * val)154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155 				  u32 reg, u32 *val)
156 {
157 	switch (reg) {
158 	case GRBM_STATUS:
159 	case GRBM_STATUS2:
160 	case GRBM_STATUS_SE0:
161 	case GRBM_STATUS_SE1:
162 	case GRBM_STATUS_SE2:
163 	case GRBM_STATUS_SE3:
164 	case SRBM_STATUS:
165 	case SRBM_STATUS2:
166 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168 	case UVD_STATUS:
169 	/* TODO VCE */
170 		*val = RREG32(reg);
171 		return 0;
172 	default:
173 		return -EINVAL;
174 	}
175 }
176 
177 /*
178  * Indirect registers accessor
179  */
cik_didt_rreg(struct radeon_device * rdev,u32 reg)180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182 	unsigned long flags;
183 	u32 r;
184 
185 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186 	WREG32(CIK_DIDT_IND_INDEX, (reg));
187 	r = RREG32(CIK_DIDT_IND_DATA);
188 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189 	return r;
190 }
191 
cik_didt_wreg(struct radeon_device * rdev,u32 reg,u32 v)192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194 	unsigned long flags;
195 
196 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197 	WREG32(CIK_DIDT_IND_INDEX, (reg));
198 	WREG32(CIK_DIDT_IND_DATA, (v));
199 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201 
202 /* get temperature in millidegrees */
ci_get_temp(struct radeon_device * rdev)203 int ci_get_temp(struct radeon_device *rdev)
204 {
205 	u32 temp;
206 	int actual_temp = 0;
207 
208 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209 		CTF_TEMP_SHIFT;
210 
211 	if (temp & 0x200)
212 		actual_temp = 255;
213 	else
214 		actual_temp = temp & 0x1ff;
215 
216 	actual_temp = actual_temp * 1000;
217 
218 	return actual_temp;
219 }
220 
221 /* get temperature in millidegrees */
kv_get_temp(struct radeon_device * rdev)222 int kv_get_temp(struct radeon_device *rdev)
223 {
224 	u32 temp;
225 	int actual_temp = 0;
226 
227 	temp = RREG32_SMC(0xC0300E0C);
228 
229 	if (temp)
230 		actual_temp = (temp / 8) - 49;
231 	else
232 		actual_temp = 0;
233 
234 	actual_temp = actual_temp * 1000;
235 
236 	return actual_temp;
237 }
238 
239 /*
240  * Indirect registers accessor
241  */
cik_pciep_rreg(struct radeon_device * rdev,u32 reg)242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244 	unsigned long flags;
245 	u32 r;
246 
247 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248 	WREG32(PCIE_INDEX, reg);
249 	(void)RREG32(PCIE_INDEX);
250 	r = RREG32(PCIE_DATA);
251 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252 	return r;
253 }
254 
cik_pciep_wreg(struct radeon_device * rdev,u32 reg,u32 v)255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257 	unsigned long flags;
258 
259 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260 	WREG32(PCIE_INDEX, reg);
261 	(void)RREG32(PCIE_INDEX);
262 	WREG32(PCIE_DATA, v);
263 	(void)RREG32(PCIE_DATA);
264 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266 
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269 	(0x0e00 << 16) | (0xc12c >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc140 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc150 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc15c >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc168 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc170 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc178 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc204 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc2b4 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc2b8 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2bc >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2c0 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0x8228 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0x829c >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x869c >> 2),
298 	0x00000000,
299 	(0x0600 << 16) | (0x98f4 >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x98f8 >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0x9900 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0xc260 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0x90e8 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0x3c000 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x3c00c >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x8c1c >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x9700 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0xcd20 >> 2),
318 	0x00000000,
319 	(0x4e00 << 16) | (0xcd20 >> 2),
320 	0x00000000,
321 	(0x5e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x6e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x7e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x8e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x9e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0xae00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0xbe00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0x89bc >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0x8900 >> 2),
338 	0x00000000,
339 	0x3,
340 	(0x0e00 << 16) | (0xc130 >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0xc134 >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc1fc >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc208 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc264 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc268 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc26c >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc270 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc274 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc278 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc27c >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc280 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc284 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc288 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc28c >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc290 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc294 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc298 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc29c >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc2a0 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc2a4 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a8 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2ac  >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2b0 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x301d0 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x30238 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x30250 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30254 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30258 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x3025c >> 2),
399 	0x00000000,
400 	(0x4e00 << 16) | (0xc900 >> 2),
401 	0x00000000,
402 	(0x5e00 << 16) | (0xc900 >> 2),
403 	0x00000000,
404 	(0x6e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x7e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x8e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x9e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0xae00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0xbe00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0x4e00 << 16) | (0xc904 >> 2),
417 	0x00000000,
418 	(0x5e00 << 16) | (0xc904 >> 2),
419 	0x00000000,
420 	(0x6e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x7e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x8e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x9e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0xae00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0xbe00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0x4e00 << 16) | (0xc908 >> 2),
433 	0x00000000,
434 	(0x5e00 << 16) | (0xc908 >> 2),
435 	0x00000000,
436 	(0x6e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x7e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x8e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x9e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0xae00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0xbe00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0x4e00 << 16) | (0xc90c >> 2),
449 	0x00000000,
450 	(0x5e00 << 16) | (0xc90c >> 2),
451 	0x00000000,
452 	(0x6e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x7e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x8e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x9e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0xae00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0xbe00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0x4e00 << 16) | (0xc910 >> 2),
465 	0x00000000,
466 	(0x5e00 << 16) | (0xc910 >> 2),
467 	0x00000000,
468 	(0x6e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x7e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x8e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x9e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0xae00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0xbe00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0xc99c >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x9834 >> 2),
483 	0x00000000,
484 	(0x0000 << 16) | (0x30f00 >> 2),
485 	0x00000000,
486 	(0x0001 << 16) | (0x30f00 >> 2),
487 	0x00000000,
488 	(0x0000 << 16) | (0x30f04 >> 2),
489 	0x00000000,
490 	(0x0001 << 16) | (0x30f04 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f08 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f08 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f0c >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f0c >> 2),
499 	0x00000000,
500 	(0x0600 << 16) | (0x9b7c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x8a14 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x8a18 >> 2),
505 	0x00000000,
506 	(0x0600 << 16) | (0x30a00 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x8bf0 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8bcc >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8b24 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x30a04 >> 2),
515 	0x00000000,
516 	(0x0600 << 16) | (0x30a10 >> 2),
517 	0x00000000,
518 	(0x0600 << 16) | (0x30a14 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a18 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a2c >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xc700 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xc704 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc708 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc768 >> 2),
531 	0x00000000,
532 	(0x0400 << 16) | (0xc770 >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0xc774 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc778 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc77c >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc780 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc784 >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc788 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc78c >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc798 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc79c >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc7a0 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc7a4 >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a8 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7ac >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7b0 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7b4 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x9100 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x3c010 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x92a8 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x92ac >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92b4 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92b8 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92bc >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92c0 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92c4 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c8 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92cc >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92d0 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x8c00 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8c04 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c20 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c38 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c3c >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xae00 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x9604 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xac08 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xac0c >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac10 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac14 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac58 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac68 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac6c >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac70 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac74 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac78 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac7c >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac80 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac84 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac88 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac8c >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x970c >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x9714 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x9718 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x971c >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x31068 >> 2),
641 	0x00000000,
642 	(0x4e00 << 16) | (0x31068 >> 2),
643 	0x00000000,
644 	(0x5e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x6e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x7e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x8e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x9e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0xae00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0xbe00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xcd10 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xcd14 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x88b0 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x88b4 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b8 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88bc >> 2),
669 	0x00000000,
670 	(0x0400 << 16) | (0x89c0 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88c4 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x88c8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88d0 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88d4 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d8 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x8980 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x30938 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x3093c >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x30940 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x89a0 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30900 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x30904 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x89b4 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x3c210 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x3c214 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c218 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x8904 >> 2),
705 	0x00000000,
706 	0x5,
707 	(0x0e00 << 16) | (0x8c28 >> 2),
708 	(0x0e00 << 16) | (0x8c2c >> 2),
709 	(0x0e00 << 16) | (0x8c30 >> 2),
710 	(0x0e00 << 16) | (0x8c34 >> 2),
711 	(0x0e00 << 16) | (0x9600 >> 2),
712 };
713 
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716 	(0x0e00 << 16) | (0xc12c >> 2),
717 	0x00000000,
718 	(0x0e00 << 16) | (0xc140 >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc150 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc15c >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc168 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc170 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc204 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc2b4 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc2b8 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2bc >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2c0 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0x8228 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0x829c >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x869c >> 2),
743 	0x00000000,
744 	(0x0600 << 16) | (0x98f4 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x98f8 >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x9900 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0xc260 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x90e8 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x3c000 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x3c00c >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x8c1c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x9700 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xcd20 >> 2),
763 	0x00000000,
764 	(0x4e00 << 16) | (0xcd20 >> 2),
765 	0x00000000,
766 	(0x5e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x6e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x7e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x89bc >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x8900 >> 2),
775 	0x00000000,
776 	0x3,
777 	(0x0e00 << 16) | (0xc130 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0xc134 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc1fc >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc208 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc264 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc268 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc26c >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc270 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc274 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc28c >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc290 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc294 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc298 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc2a0 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc2a4 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a8 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2ac >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x301d0 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x30238 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x30250 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30254 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30258 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x3025c >> 2),
822 	0x00000000,
823 	(0x4e00 << 16) | (0xc900 >> 2),
824 	0x00000000,
825 	(0x5e00 << 16) | (0xc900 >> 2),
826 	0x00000000,
827 	(0x6e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x7e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x4e00 << 16) | (0xc904 >> 2),
832 	0x00000000,
833 	(0x5e00 << 16) | (0xc904 >> 2),
834 	0x00000000,
835 	(0x6e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x7e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x4e00 << 16) | (0xc908 >> 2),
840 	0x00000000,
841 	(0x5e00 << 16) | (0xc908 >> 2),
842 	0x00000000,
843 	(0x6e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x7e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x4e00 << 16) | (0xc90c >> 2),
848 	0x00000000,
849 	(0x5e00 << 16) | (0xc90c >> 2),
850 	0x00000000,
851 	(0x6e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x7e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x4e00 << 16) | (0xc910 >> 2),
856 	0x00000000,
857 	(0x5e00 << 16) | (0xc910 >> 2),
858 	0x00000000,
859 	(0x6e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x7e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0xc99c >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x9834 >> 2),
866 	0x00000000,
867 	(0x0000 << 16) | (0x30f00 >> 2),
868 	0x00000000,
869 	(0x0000 << 16) | (0x30f04 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f08 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f0c >> 2),
874 	0x00000000,
875 	(0x0600 << 16) | (0x9b7c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x8a14 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x8a18 >> 2),
880 	0x00000000,
881 	(0x0600 << 16) | (0x30a00 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8bf0 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x8bcc >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8b24 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x30a04 >> 2),
890 	0x00000000,
891 	(0x0600 << 16) | (0x30a10 >> 2),
892 	0x00000000,
893 	(0x0600 << 16) | (0x30a14 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a18 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a2c >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xc700 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xc704 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc708 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc768 >> 2),
906 	0x00000000,
907 	(0x0400 << 16) | (0xc770 >> 2),
908 	0x00000000,
909 	(0x0400 << 16) | (0xc774 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc798 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc79c >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x9100 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x3c010 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x8c00 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x8c04 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c20 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c38 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c3c >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0xae00 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x9604 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xac08 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0xac0c >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac10 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac14 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac58 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac68 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac6c >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac70 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac74 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac78 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac7c >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac80 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac84 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac88 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac8c >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x970c >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x9714 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x9718 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x971c >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x31068 >> 2),
972 	0x00000000,
973 	(0x4e00 << 16) | (0x31068 >> 2),
974 	0x00000000,
975 	(0x5e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x6e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x7e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x0e00 << 16) | (0xcd10 >> 2),
982 	0x00000000,
983 	(0x0e00 << 16) | (0xcd14 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0x88b0 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0x88b4 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b8 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88bc >> 2),
992 	0x00000000,
993 	(0x0400 << 16) | (0x89c0 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88c4 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x88c8 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88d0 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88d4 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d8 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x8980 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x30938 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x3093c >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x30940 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x89a0 >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30900 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x30904 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x89b4 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x3e1fc >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x3c210 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3c214 >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c218 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x8904 >> 2),
1030 	0x00000000,
1031 	0x5,
1032 	(0x0e00 << 16) | (0x8c28 >> 2),
1033 	(0x0e00 << 16) | (0x8c2c >> 2),
1034 	(0x0e00 << 16) | (0x8c30 >> 2),
1035 	(0x0e00 << 16) | (0x8c34 >> 2),
1036 	(0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038 
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041 	0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043 
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046 	0xc770, 0xffffffff, 0x00000800,
1047 	0xc774, 0xffffffff, 0x00000800,
1048 	0xc798, 0xffffffff, 0x00007fbf,
1049 	0xc79c, 0xffffffff, 0x00007faf
1050 };
1051 
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054 	0x3354, 0x00000333, 0x00000333,
1055 	0x3350, 0x000c0fc0, 0x00040200,
1056 	0x9a10, 0x00010000, 0x00058208,
1057 	0x3c000, 0xffff1fff, 0x00140000,
1058 	0x3c200, 0xfdfc0fff, 0x00000100,
1059 	0x3c234, 0x40000000, 0x40000200,
1060 	0x9830, 0xffffffff, 0x00000000,
1061 	0x9834, 0xf00fffff, 0x00000400,
1062 	0x9838, 0x0002021c, 0x00020200,
1063 	0xc78, 0x00000080, 0x00000000,
1064 	0x5bb0, 0x000000f0, 0x00000070,
1065 	0x5bc0, 0xf0311fff, 0x80300000,
1066 	0x98f8, 0x73773777, 0x12010001,
1067 	0x350c, 0x00810000, 0x408af000,
1068 	0x7030, 0x31000111, 0x00000011,
1069 	0x2f48, 0x73773777, 0x12010001,
1070 	0x220c, 0x00007fb6, 0x0021a1b1,
1071 	0x2210, 0x00007fb6, 0x002021b1,
1072 	0x2180, 0x00007fb6, 0x00002191,
1073 	0x2218, 0x00007fb6, 0x002121b1,
1074 	0x221c, 0x00007fb6, 0x002021b1,
1075 	0x21dc, 0x00007fb6, 0x00002191,
1076 	0x21e0, 0x00007fb6, 0x00002191,
1077 	0x3628, 0x0000003f, 0x0000000a,
1078 	0x362c, 0x0000003f, 0x0000000a,
1079 	0x2ae4, 0x00073ffe, 0x000022a2,
1080 	0x240c, 0x000007ff, 0x00000000,
1081 	0x8a14, 0xf000003f, 0x00000007,
1082 	0x8bf0, 0x00002001, 0x00000001,
1083 	0x8b24, 0xffffffff, 0x00ffffff,
1084 	0x30a04, 0x0000ff0f, 0x00000000,
1085 	0x28a4c, 0x07ffffff, 0x06000000,
1086 	0x4d8, 0x00000fff, 0x00000100,
1087 	0x3e78, 0x00000001, 0x00000002,
1088 	0x9100, 0x03000000, 0x0362c688,
1089 	0x8c00, 0x000000ff, 0x00000001,
1090 	0xe40, 0x00001fff, 0x00001fff,
1091 	0x9060, 0x0000007f, 0x00000020,
1092 	0x9508, 0x00010000, 0x00010000,
1093 	0xac14, 0x000003ff, 0x000000f3,
1094 	0xac0c, 0xffffffff, 0x00001032
1095 };
1096 
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099 	0xc420, 0xffffffff, 0xfffffffc,
1100 	0x30800, 0xffffffff, 0xe0000000,
1101 	0x3c2a0, 0xffffffff, 0x00000100,
1102 	0x3c208, 0xffffffff, 0x00000100,
1103 	0x3c2c0, 0xffffffff, 0xc0000100,
1104 	0x3c2c8, 0xffffffff, 0xc0000100,
1105 	0x3c2c4, 0xffffffff, 0xc0000100,
1106 	0x55e4, 0xffffffff, 0x00600100,
1107 	0x3c280, 0xffffffff, 0x00000100,
1108 	0x3c214, 0xffffffff, 0x06000100,
1109 	0x3c220, 0xffffffff, 0x00000100,
1110 	0x3c218, 0xffffffff, 0x06000100,
1111 	0x3c204, 0xffffffff, 0x00000100,
1112 	0x3c2e0, 0xffffffff, 0x00000100,
1113 	0x3c224, 0xffffffff, 0x00000100,
1114 	0x3c200, 0xffffffff, 0x00000100,
1115 	0x3c230, 0xffffffff, 0x00000100,
1116 	0x3c234, 0xffffffff, 0x00000100,
1117 	0x3c250, 0xffffffff, 0x00000100,
1118 	0x3c254, 0xffffffff, 0x00000100,
1119 	0x3c258, 0xffffffff, 0x00000100,
1120 	0x3c25c, 0xffffffff, 0x00000100,
1121 	0x3c260, 0xffffffff, 0x00000100,
1122 	0x3c27c, 0xffffffff, 0x00000100,
1123 	0x3c278, 0xffffffff, 0x00000100,
1124 	0x3c210, 0xffffffff, 0x06000100,
1125 	0x3c290, 0xffffffff, 0x00000100,
1126 	0x3c274, 0xffffffff, 0x00000100,
1127 	0x3c2b4, 0xffffffff, 0x00000100,
1128 	0x3c2b0, 0xffffffff, 0x00000100,
1129 	0x3c270, 0xffffffff, 0x00000100,
1130 	0x30800, 0xffffffff, 0xe0000000,
1131 	0x3c020, 0xffffffff, 0x00010000,
1132 	0x3c024, 0xffffffff, 0x00030002,
1133 	0x3c028, 0xffffffff, 0x00040007,
1134 	0x3c02c, 0xffffffff, 0x00060005,
1135 	0x3c030, 0xffffffff, 0x00090008,
1136 	0x3c034, 0xffffffff, 0x00010000,
1137 	0x3c038, 0xffffffff, 0x00030002,
1138 	0x3c03c, 0xffffffff, 0x00040007,
1139 	0x3c040, 0xffffffff, 0x00060005,
1140 	0x3c044, 0xffffffff, 0x00090008,
1141 	0x3c048, 0xffffffff, 0x00010000,
1142 	0x3c04c, 0xffffffff, 0x00030002,
1143 	0x3c050, 0xffffffff, 0x00040007,
1144 	0x3c054, 0xffffffff, 0x00060005,
1145 	0x3c058, 0xffffffff, 0x00090008,
1146 	0x3c05c, 0xffffffff, 0x00010000,
1147 	0x3c060, 0xffffffff, 0x00030002,
1148 	0x3c064, 0xffffffff, 0x00040007,
1149 	0x3c068, 0xffffffff, 0x00060005,
1150 	0x3c06c, 0xffffffff, 0x00090008,
1151 	0x3c070, 0xffffffff, 0x00010000,
1152 	0x3c074, 0xffffffff, 0x00030002,
1153 	0x3c078, 0xffffffff, 0x00040007,
1154 	0x3c07c, 0xffffffff, 0x00060005,
1155 	0x3c080, 0xffffffff, 0x00090008,
1156 	0x3c084, 0xffffffff, 0x00010000,
1157 	0x3c088, 0xffffffff, 0x00030002,
1158 	0x3c08c, 0xffffffff, 0x00040007,
1159 	0x3c090, 0xffffffff, 0x00060005,
1160 	0x3c094, 0xffffffff, 0x00090008,
1161 	0x3c098, 0xffffffff, 0x00010000,
1162 	0x3c09c, 0xffffffff, 0x00030002,
1163 	0x3c0a0, 0xffffffff, 0x00040007,
1164 	0x3c0a4, 0xffffffff, 0x00060005,
1165 	0x3c0a8, 0xffffffff, 0x00090008,
1166 	0x3c000, 0xffffffff, 0x96e00200,
1167 	0x8708, 0xffffffff, 0x00900100,
1168 	0xc424, 0xffffffff, 0x0020003f,
1169 	0x38, 0xffffffff, 0x0140001c,
1170 	0x3c, 0x000f0000, 0x000f0000,
1171 	0x220, 0xffffffff, 0xC060000C,
1172 	0x224, 0xc0000fff, 0x00000100,
1173 	0xf90, 0xffffffff, 0x00000100,
1174 	0xf98, 0x00000101, 0x00000000,
1175 	0x20a8, 0xffffffff, 0x00000104,
1176 	0x55e4, 0xff000fff, 0x00000100,
1177 	0x30cc, 0xc0000fff, 0x00000104,
1178 	0xc1e4, 0x00000001, 0x00000001,
1179 	0xd00c, 0xff000ff0, 0x00000100,
1180 	0xd80c, 0xff000ff0, 0x00000100
1181 };
1182 
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185 	0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187 
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190 	0xc770, 0xffffffff, 0x00000800,
1191 	0xc774, 0xffffffff, 0x00000800,
1192 	0xc798, 0xffffffff, 0x00007fbf,
1193 	0xc79c, 0xffffffff, 0x00007faf
1194 };
1195 
1196 static const u32 spectre_golden_registers[] =
1197 {
1198 	0x3c000, 0xffff1fff, 0x96940200,
1199 	0x3c00c, 0xffff0001, 0xff000000,
1200 	0x3c200, 0xfffc0fff, 0x00000100,
1201 	0x6ed8, 0x00010101, 0x00010000,
1202 	0x9834, 0xf00fffff, 0x00000400,
1203 	0x9838, 0xfffffffc, 0x00020200,
1204 	0x5bb0, 0x000000f0, 0x00000070,
1205 	0x5bc0, 0xf0311fff, 0x80300000,
1206 	0x98f8, 0x73773777, 0x12010001,
1207 	0x9b7c, 0x00ff0000, 0x00fc0000,
1208 	0x2f48, 0x73773777, 0x12010001,
1209 	0x8a14, 0xf000003f, 0x00000007,
1210 	0x8b24, 0xffffffff, 0x00ffffff,
1211 	0x28350, 0x3f3f3fff, 0x00000082,
1212 	0x28354, 0x0000003f, 0x00000000,
1213 	0x3e78, 0x00000001, 0x00000002,
1214 	0x913c, 0xffff03df, 0x00000004,
1215 	0xc768, 0x00000008, 0x00000008,
1216 	0x8c00, 0x000008ff, 0x00000800,
1217 	0x9508, 0x00010000, 0x00010000,
1218 	0xac0c, 0xffffffff, 0x54763210,
1219 	0x214f8, 0x01ff01ff, 0x00000002,
1220 	0x21498, 0x007ff800, 0x00200000,
1221 	0x2015c, 0xffffffff, 0x00000f40,
1222 	0x30934, 0xffffffff, 0x00000001
1223 };
1224 
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227 	0xc420, 0xffffffff, 0xfffffffc,
1228 	0x30800, 0xffffffff, 0xe0000000,
1229 	0x3c2a0, 0xffffffff, 0x00000100,
1230 	0x3c208, 0xffffffff, 0x00000100,
1231 	0x3c2c0, 0xffffffff, 0x00000100,
1232 	0x3c2c8, 0xffffffff, 0x00000100,
1233 	0x3c2c4, 0xffffffff, 0x00000100,
1234 	0x55e4, 0xffffffff, 0x00600100,
1235 	0x3c280, 0xffffffff, 0x00000100,
1236 	0x3c214, 0xffffffff, 0x06000100,
1237 	0x3c220, 0xffffffff, 0x00000100,
1238 	0x3c218, 0xffffffff, 0x06000100,
1239 	0x3c204, 0xffffffff, 0x00000100,
1240 	0x3c2e0, 0xffffffff, 0x00000100,
1241 	0x3c224, 0xffffffff, 0x00000100,
1242 	0x3c200, 0xffffffff, 0x00000100,
1243 	0x3c230, 0xffffffff, 0x00000100,
1244 	0x3c234, 0xffffffff, 0x00000100,
1245 	0x3c250, 0xffffffff, 0x00000100,
1246 	0x3c254, 0xffffffff, 0x00000100,
1247 	0x3c258, 0xffffffff, 0x00000100,
1248 	0x3c25c, 0xffffffff, 0x00000100,
1249 	0x3c260, 0xffffffff, 0x00000100,
1250 	0x3c27c, 0xffffffff, 0x00000100,
1251 	0x3c278, 0xffffffff, 0x00000100,
1252 	0x3c210, 0xffffffff, 0x06000100,
1253 	0x3c290, 0xffffffff, 0x00000100,
1254 	0x3c274, 0xffffffff, 0x00000100,
1255 	0x3c2b4, 0xffffffff, 0x00000100,
1256 	0x3c2b0, 0xffffffff, 0x00000100,
1257 	0x3c270, 0xffffffff, 0x00000100,
1258 	0x30800, 0xffffffff, 0xe0000000,
1259 	0x3c020, 0xffffffff, 0x00010000,
1260 	0x3c024, 0xffffffff, 0x00030002,
1261 	0x3c028, 0xffffffff, 0x00040007,
1262 	0x3c02c, 0xffffffff, 0x00060005,
1263 	0x3c030, 0xffffffff, 0x00090008,
1264 	0x3c034, 0xffffffff, 0x00010000,
1265 	0x3c038, 0xffffffff, 0x00030002,
1266 	0x3c03c, 0xffffffff, 0x00040007,
1267 	0x3c040, 0xffffffff, 0x00060005,
1268 	0x3c044, 0xffffffff, 0x00090008,
1269 	0x3c048, 0xffffffff, 0x00010000,
1270 	0x3c04c, 0xffffffff, 0x00030002,
1271 	0x3c050, 0xffffffff, 0x00040007,
1272 	0x3c054, 0xffffffff, 0x00060005,
1273 	0x3c058, 0xffffffff, 0x00090008,
1274 	0x3c05c, 0xffffffff, 0x00010000,
1275 	0x3c060, 0xffffffff, 0x00030002,
1276 	0x3c064, 0xffffffff, 0x00040007,
1277 	0x3c068, 0xffffffff, 0x00060005,
1278 	0x3c06c, 0xffffffff, 0x00090008,
1279 	0x3c070, 0xffffffff, 0x00010000,
1280 	0x3c074, 0xffffffff, 0x00030002,
1281 	0x3c078, 0xffffffff, 0x00040007,
1282 	0x3c07c, 0xffffffff, 0x00060005,
1283 	0x3c080, 0xffffffff, 0x00090008,
1284 	0x3c084, 0xffffffff, 0x00010000,
1285 	0x3c088, 0xffffffff, 0x00030002,
1286 	0x3c08c, 0xffffffff, 0x00040007,
1287 	0x3c090, 0xffffffff, 0x00060005,
1288 	0x3c094, 0xffffffff, 0x00090008,
1289 	0x3c098, 0xffffffff, 0x00010000,
1290 	0x3c09c, 0xffffffff, 0x00030002,
1291 	0x3c0a0, 0xffffffff, 0x00040007,
1292 	0x3c0a4, 0xffffffff, 0x00060005,
1293 	0x3c0a8, 0xffffffff, 0x00090008,
1294 	0x3c0ac, 0xffffffff, 0x00010000,
1295 	0x3c0b0, 0xffffffff, 0x00030002,
1296 	0x3c0b4, 0xffffffff, 0x00040007,
1297 	0x3c0b8, 0xffffffff, 0x00060005,
1298 	0x3c0bc, 0xffffffff, 0x00090008,
1299 	0x3c000, 0xffffffff, 0x96e00200,
1300 	0x8708, 0xffffffff, 0x00900100,
1301 	0xc424, 0xffffffff, 0x0020003f,
1302 	0x38, 0xffffffff, 0x0140001c,
1303 	0x3c, 0x000f0000, 0x000f0000,
1304 	0x220, 0xffffffff, 0xC060000C,
1305 	0x224, 0xc0000fff, 0x00000100,
1306 	0xf90, 0xffffffff, 0x00000100,
1307 	0xf98, 0x00000101, 0x00000000,
1308 	0x20a8, 0xffffffff, 0x00000104,
1309 	0x55e4, 0xff000fff, 0x00000100,
1310 	0x30cc, 0xc0000fff, 0x00000104,
1311 	0xc1e4, 0x00000001, 0x00000001,
1312 	0xd00c, 0xff000ff0, 0x00000100,
1313 	0xd80c, 0xff000ff0, 0x00000100
1314 };
1315 
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318 	0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320 
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323 	0xc770, 0xffffffff, 0x00000800,
1324 	0xc774, 0xffffffff, 0x00000800,
1325 	0xc798, 0xffffffff, 0x00007fbf,
1326 	0xc79c, 0xffffffff, 0x00007faf
1327 };
1328 
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331 	0x3c000, 0xffffdfff, 0x6e944040,
1332 	0x55e4, 0xff607fff, 0xfc000100,
1333 	0x3c220, 0xff000fff, 0x00000100,
1334 	0x3c224, 0xff000fff, 0x00000100,
1335 	0x3c200, 0xfffc0fff, 0x00000100,
1336 	0x6ed8, 0x00010101, 0x00010000,
1337 	0x9830, 0xffffffff, 0x00000000,
1338 	0x9834, 0xf00fffff, 0x00000400,
1339 	0x5bb0, 0x000000f0, 0x00000070,
1340 	0x5bc0, 0xf0311fff, 0x80300000,
1341 	0x98f8, 0x73773777, 0x12010001,
1342 	0x98fc, 0xffffffff, 0x00000010,
1343 	0x9b7c, 0x00ff0000, 0x00fc0000,
1344 	0x8030, 0x00001f0f, 0x0000100a,
1345 	0x2f48, 0x73773777, 0x12010001,
1346 	0x2408, 0x000fffff, 0x000c007f,
1347 	0x8a14, 0xf000003f, 0x00000007,
1348 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1349 	0x30a04, 0x0000ff0f, 0x00000000,
1350 	0x28a4c, 0x07ffffff, 0x06000000,
1351 	0x4d8, 0x00000fff, 0x00000100,
1352 	0x3e78, 0x00000001, 0x00000002,
1353 	0xc768, 0x00000008, 0x00000008,
1354 	0x8c00, 0x000000ff, 0x00000003,
1355 	0x214f8, 0x01ff01ff, 0x00000002,
1356 	0x21498, 0x007ff800, 0x00200000,
1357 	0x2015c, 0xffffffff, 0x00000f40,
1358 	0x88c4, 0x001f3ae3, 0x00000082,
1359 	0x88d4, 0x0000001f, 0x00000010,
1360 	0x30934, 0xffffffff, 0x00000000
1361 };
1362 
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365 	0xc420, 0xffffffff, 0xfffffffc,
1366 	0x30800, 0xffffffff, 0xe0000000,
1367 	0x3c2a0, 0xffffffff, 0x00000100,
1368 	0x3c208, 0xffffffff, 0x00000100,
1369 	0x3c2c0, 0xffffffff, 0x00000100,
1370 	0x3c2c8, 0xffffffff, 0x00000100,
1371 	0x3c2c4, 0xffffffff, 0x00000100,
1372 	0x55e4, 0xffffffff, 0x00600100,
1373 	0x3c280, 0xffffffff, 0x00000100,
1374 	0x3c214, 0xffffffff, 0x06000100,
1375 	0x3c220, 0xffffffff, 0x00000100,
1376 	0x3c218, 0xffffffff, 0x06000100,
1377 	0x3c204, 0xffffffff, 0x00000100,
1378 	0x3c2e0, 0xffffffff, 0x00000100,
1379 	0x3c224, 0xffffffff, 0x00000100,
1380 	0x3c200, 0xffffffff, 0x00000100,
1381 	0x3c230, 0xffffffff, 0x00000100,
1382 	0x3c234, 0xffffffff, 0x00000100,
1383 	0x3c250, 0xffffffff, 0x00000100,
1384 	0x3c254, 0xffffffff, 0x00000100,
1385 	0x3c258, 0xffffffff, 0x00000100,
1386 	0x3c25c, 0xffffffff, 0x00000100,
1387 	0x3c260, 0xffffffff, 0x00000100,
1388 	0x3c27c, 0xffffffff, 0x00000100,
1389 	0x3c278, 0xffffffff, 0x00000100,
1390 	0x3c210, 0xffffffff, 0x06000100,
1391 	0x3c290, 0xffffffff, 0x00000100,
1392 	0x3c274, 0xffffffff, 0x00000100,
1393 	0x3c2b4, 0xffffffff, 0x00000100,
1394 	0x3c2b0, 0xffffffff, 0x00000100,
1395 	0x3c270, 0xffffffff, 0x00000100,
1396 	0x30800, 0xffffffff, 0xe0000000,
1397 	0x3c020, 0xffffffff, 0x00010000,
1398 	0x3c024, 0xffffffff, 0x00030002,
1399 	0x3c028, 0xffffffff, 0x00040007,
1400 	0x3c02c, 0xffffffff, 0x00060005,
1401 	0x3c030, 0xffffffff, 0x00090008,
1402 	0x3c034, 0xffffffff, 0x00010000,
1403 	0x3c038, 0xffffffff, 0x00030002,
1404 	0x3c03c, 0xffffffff, 0x00040007,
1405 	0x3c040, 0xffffffff, 0x00060005,
1406 	0x3c044, 0xffffffff, 0x00090008,
1407 	0x3c000, 0xffffffff, 0x96e00200,
1408 	0x8708, 0xffffffff, 0x00900100,
1409 	0xc424, 0xffffffff, 0x0020003f,
1410 	0x38, 0xffffffff, 0x0140001c,
1411 	0x3c, 0x000f0000, 0x000f0000,
1412 	0x220, 0xffffffff, 0xC060000C,
1413 	0x224, 0xc0000fff, 0x00000100,
1414 	0x20a8, 0xffffffff, 0x00000104,
1415 	0x55e4, 0xff000fff, 0x00000100,
1416 	0x30cc, 0xc0000fff, 0x00000104,
1417 	0xc1e4, 0x00000001, 0x00000001,
1418 	0xd00c, 0xff000ff0, 0x00000100,
1419 	0xd80c, 0xff000ff0, 0x00000100
1420 };
1421 
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424 	0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426 
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429 	0x30800, 0xffffffff, 0xe0000000,
1430 	0x28350, 0xffffffff, 0x3a00161a,
1431 	0x28354, 0xffffffff, 0x0000002e,
1432 	0x9a10, 0xffffffff, 0x00018208,
1433 	0x98f8, 0xffffffff, 0x12011003
1434 };
1435 
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438 	0x3354, 0x00000333, 0x00000333,
1439 	0x9a10, 0x00010000, 0x00058208,
1440 	0x9830, 0xffffffff, 0x00000000,
1441 	0x9834, 0xf00fffff, 0x00000400,
1442 	0x9838, 0x0002021c, 0x00020200,
1443 	0xc78, 0x00000080, 0x00000000,
1444 	0x5bb0, 0x000000f0, 0x00000070,
1445 	0x5bc0, 0xf0311fff, 0x80300000,
1446 	0x350c, 0x00810000, 0x408af000,
1447 	0x7030, 0x31000111, 0x00000011,
1448 	0x2f48, 0x73773777, 0x12010001,
1449 	0x2120, 0x0000007f, 0x0000001b,
1450 	0x21dc, 0x00007fb6, 0x00002191,
1451 	0x3628, 0x0000003f, 0x0000000a,
1452 	0x362c, 0x0000003f, 0x0000000a,
1453 	0x2ae4, 0x00073ffe, 0x000022a2,
1454 	0x240c, 0x000007ff, 0x00000000,
1455 	0x8bf0, 0x00002001, 0x00000001,
1456 	0x8b24, 0xffffffff, 0x00ffffff,
1457 	0x30a04, 0x0000ff0f, 0x00000000,
1458 	0x28a4c, 0x07ffffff, 0x06000000,
1459 	0x3e78, 0x00000001, 0x00000002,
1460 	0xc768, 0x00000008, 0x00000008,
1461 	0xc770, 0x00000f00, 0x00000800,
1462 	0xc774, 0x00000f00, 0x00000800,
1463 	0xc798, 0x00ffffff, 0x00ff7fbf,
1464 	0xc79c, 0x00ffffff, 0x00ff7faf,
1465 	0x8c00, 0x000000ff, 0x00000800,
1466 	0xe40, 0x00001fff, 0x00001fff,
1467 	0x9060, 0x0000007f, 0x00000020,
1468 	0x9508, 0x00010000, 0x00010000,
1469 	0xae00, 0x00100000, 0x000ff07c,
1470 	0xac14, 0x000003ff, 0x0000000f,
1471 	0xac10, 0xffffffff, 0x7564fdec,
1472 	0xac0c, 0xffffffff, 0x3120b9a8,
1473 	0xac08, 0x20000000, 0x0f9c0000
1474 };
1475 
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478 	0xc420, 0xffffffff, 0xfffffffd,
1479 	0x30800, 0xffffffff, 0xe0000000,
1480 	0x3c2a0, 0xffffffff, 0x00000100,
1481 	0x3c208, 0xffffffff, 0x00000100,
1482 	0x3c2c0, 0xffffffff, 0x00000100,
1483 	0x3c2c8, 0xffffffff, 0x00000100,
1484 	0x3c2c4, 0xffffffff, 0x00000100,
1485 	0x55e4, 0xffffffff, 0x00200100,
1486 	0x3c280, 0xffffffff, 0x00000100,
1487 	0x3c214, 0xffffffff, 0x06000100,
1488 	0x3c220, 0xffffffff, 0x00000100,
1489 	0x3c218, 0xffffffff, 0x06000100,
1490 	0x3c204, 0xffffffff, 0x00000100,
1491 	0x3c2e0, 0xffffffff, 0x00000100,
1492 	0x3c224, 0xffffffff, 0x00000100,
1493 	0x3c200, 0xffffffff, 0x00000100,
1494 	0x3c230, 0xffffffff, 0x00000100,
1495 	0x3c234, 0xffffffff, 0x00000100,
1496 	0x3c250, 0xffffffff, 0x00000100,
1497 	0x3c254, 0xffffffff, 0x00000100,
1498 	0x3c258, 0xffffffff, 0x00000100,
1499 	0x3c25c, 0xffffffff, 0x00000100,
1500 	0x3c260, 0xffffffff, 0x00000100,
1501 	0x3c27c, 0xffffffff, 0x00000100,
1502 	0x3c278, 0xffffffff, 0x00000100,
1503 	0x3c210, 0xffffffff, 0x06000100,
1504 	0x3c290, 0xffffffff, 0x00000100,
1505 	0x3c274, 0xffffffff, 0x00000100,
1506 	0x3c2b4, 0xffffffff, 0x00000100,
1507 	0x3c2b0, 0xffffffff, 0x00000100,
1508 	0x3c270, 0xffffffff, 0x00000100,
1509 	0x30800, 0xffffffff, 0xe0000000,
1510 	0x3c020, 0xffffffff, 0x00010000,
1511 	0x3c024, 0xffffffff, 0x00030002,
1512 	0x3c028, 0xffffffff, 0x00040007,
1513 	0x3c02c, 0xffffffff, 0x00060005,
1514 	0x3c030, 0xffffffff, 0x00090008,
1515 	0x3c034, 0xffffffff, 0x00010000,
1516 	0x3c038, 0xffffffff, 0x00030002,
1517 	0x3c03c, 0xffffffff, 0x00040007,
1518 	0x3c040, 0xffffffff, 0x00060005,
1519 	0x3c044, 0xffffffff, 0x00090008,
1520 	0x3c048, 0xffffffff, 0x00010000,
1521 	0x3c04c, 0xffffffff, 0x00030002,
1522 	0x3c050, 0xffffffff, 0x00040007,
1523 	0x3c054, 0xffffffff, 0x00060005,
1524 	0x3c058, 0xffffffff, 0x00090008,
1525 	0x3c05c, 0xffffffff, 0x00010000,
1526 	0x3c060, 0xffffffff, 0x00030002,
1527 	0x3c064, 0xffffffff, 0x00040007,
1528 	0x3c068, 0xffffffff, 0x00060005,
1529 	0x3c06c, 0xffffffff, 0x00090008,
1530 	0x3c070, 0xffffffff, 0x00010000,
1531 	0x3c074, 0xffffffff, 0x00030002,
1532 	0x3c078, 0xffffffff, 0x00040007,
1533 	0x3c07c, 0xffffffff, 0x00060005,
1534 	0x3c080, 0xffffffff, 0x00090008,
1535 	0x3c084, 0xffffffff, 0x00010000,
1536 	0x3c088, 0xffffffff, 0x00030002,
1537 	0x3c08c, 0xffffffff, 0x00040007,
1538 	0x3c090, 0xffffffff, 0x00060005,
1539 	0x3c094, 0xffffffff, 0x00090008,
1540 	0x3c098, 0xffffffff, 0x00010000,
1541 	0x3c09c, 0xffffffff, 0x00030002,
1542 	0x3c0a0, 0xffffffff, 0x00040007,
1543 	0x3c0a4, 0xffffffff, 0x00060005,
1544 	0x3c0a8, 0xffffffff, 0x00090008,
1545 	0x3c0ac, 0xffffffff, 0x00010000,
1546 	0x3c0b0, 0xffffffff, 0x00030002,
1547 	0x3c0b4, 0xffffffff, 0x00040007,
1548 	0x3c0b8, 0xffffffff, 0x00060005,
1549 	0x3c0bc, 0xffffffff, 0x00090008,
1550 	0x3c0c0, 0xffffffff, 0x00010000,
1551 	0x3c0c4, 0xffffffff, 0x00030002,
1552 	0x3c0c8, 0xffffffff, 0x00040007,
1553 	0x3c0cc, 0xffffffff, 0x00060005,
1554 	0x3c0d0, 0xffffffff, 0x00090008,
1555 	0x3c0d4, 0xffffffff, 0x00010000,
1556 	0x3c0d8, 0xffffffff, 0x00030002,
1557 	0x3c0dc, 0xffffffff, 0x00040007,
1558 	0x3c0e0, 0xffffffff, 0x00060005,
1559 	0x3c0e4, 0xffffffff, 0x00090008,
1560 	0x3c0e8, 0xffffffff, 0x00010000,
1561 	0x3c0ec, 0xffffffff, 0x00030002,
1562 	0x3c0f0, 0xffffffff, 0x00040007,
1563 	0x3c0f4, 0xffffffff, 0x00060005,
1564 	0x3c0f8, 0xffffffff, 0x00090008,
1565 	0xc318, 0xffffffff, 0x00020200,
1566 	0x3350, 0xffffffff, 0x00000200,
1567 	0x15c0, 0xffffffff, 0x00000400,
1568 	0x55e8, 0xffffffff, 0x00000000,
1569 	0x2f50, 0xffffffff, 0x00000902,
1570 	0x3c000, 0xffffffff, 0x96940200,
1571 	0x8708, 0xffffffff, 0x00900100,
1572 	0xc424, 0xffffffff, 0x0020003f,
1573 	0x38, 0xffffffff, 0x0140001c,
1574 	0x3c, 0x000f0000, 0x000f0000,
1575 	0x220, 0xffffffff, 0xc060000c,
1576 	0x224, 0xc0000fff, 0x00000100,
1577 	0xf90, 0xffffffff, 0x00000100,
1578 	0xf98, 0x00000101, 0x00000000,
1579 	0x20a8, 0xffffffff, 0x00000104,
1580 	0x55e4, 0xff000fff, 0x00000100,
1581 	0x30cc, 0xc0000fff, 0x00000104,
1582 	0xc1e4, 0x00000001, 0x00000001,
1583 	0xd00c, 0xff000ff0, 0x00000100,
1584 	0xd80c, 0xff000ff0, 0x00000100
1585 };
1586 
1587 static const u32 godavari_golden_registers[] =
1588 {
1589 	0x55e4, 0xff607fff, 0xfc000100,
1590 	0x6ed8, 0x00010101, 0x00010000,
1591 	0x9830, 0xffffffff, 0x00000000,
1592 	0x98302, 0xf00fffff, 0x00000400,
1593 	0x6130, 0xffffffff, 0x00010000,
1594 	0x5bb0, 0x000000f0, 0x00000070,
1595 	0x5bc0, 0xf0311fff, 0x80300000,
1596 	0x98f8, 0x73773777, 0x12010001,
1597 	0x98fc, 0xffffffff, 0x00000010,
1598 	0x8030, 0x00001f0f, 0x0000100a,
1599 	0x2f48, 0x73773777, 0x12010001,
1600 	0x2408, 0x000fffff, 0x000c007f,
1601 	0x8a14, 0xf000003f, 0x00000007,
1602 	0x8b24, 0xffffffff, 0x00ff0fff,
1603 	0x30a04, 0x0000ff0f, 0x00000000,
1604 	0x28a4c, 0x07ffffff, 0x06000000,
1605 	0x4d8, 0x00000fff, 0x00000100,
1606 	0xd014, 0x00010000, 0x00810001,
1607 	0xd814, 0x00010000, 0x00810001,
1608 	0x3e78, 0x00000001, 0x00000002,
1609 	0xc768, 0x00000008, 0x00000008,
1610 	0xc770, 0x00000f00, 0x00000800,
1611 	0xc774, 0x00000f00, 0x00000800,
1612 	0xc798, 0x00ffffff, 0x00ff7fbf,
1613 	0xc79c, 0x00ffffff, 0x00ff7faf,
1614 	0x8c00, 0x000000ff, 0x00000001,
1615 	0x214f8, 0x01ff01ff, 0x00000002,
1616 	0x21498, 0x007ff800, 0x00200000,
1617 	0x2015c, 0xffffffff, 0x00000f40,
1618 	0x88c4, 0x001f3ae3, 0x00000082,
1619 	0x88d4, 0x0000001f, 0x00000010,
1620 	0x30934, 0xffffffff, 0x00000000
1621 };
1622 
1623 
cik_init_golden_registers(struct radeon_device * rdev)1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627 	mutex_lock(&rdev->grbm_idx_mutex);
1628 	switch (rdev->family) {
1629 	case CHIP_BONAIRE:
1630 		radeon_program_register_sequence(rdev,
1631 						 bonaire_mgcg_cgcg_init,
1632 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633 		radeon_program_register_sequence(rdev,
1634 						 bonaire_golden_registers,
1635 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636 		radeon_program_register_sequence(rdev,
1637 						 bonaire_golden_common_registers,
1638 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639 		radeon_program_register_sequence(rdev,
1640 						 bonaire_golden_spm_registers,
1641 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642 		break;
1643 	case CHIP_KABINI:
1644 		radeon_program_register_sequence(rdev,
1645 						 kalindi_mgcg_cgcg_init,
1646 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647 		radeon_program_register_sequence(rdev,
1648 						 kalindi_golden_registers,
1649 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650 		radeon_program_register_sequence(rdev,
1651 						 kalindi_golden_common_registers,
1652 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653 		radeon_program_register_sequence(rdev,
1654 						 kalindi_golden_spm_registers,
1655 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656 		break;
1657 	case CHIP_MULLINS:
1658 		radeon_program_register_sequence(rdev,
1659 						 kalindi_mgcg_cgcg_init,
1660 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661 		radeon_program_register_sequence(rdev,
1662 						 godavari_golden_registers,
1663 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1664 		radeon_program_register_sequence(rdev,
1665 						 kalindi_golden_common_registers,
1666 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667 		radeon_program_register_sequence(rdev,
1668 						 kalindi_golden_spm_registers,
1669 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670 		break;
1671 	case CHIP_KAVERI:
1672 		radeon_program_register_sequence(rdev,
1673 						 spectre_mgcg_cgcg_init,
1674 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675 		radeon_program_register_sequence(rdev,
1676 						 spectre_golden_registers,
1677 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1678 		radeon_program_register_sequence(rdev,
1679 						 spectre_golden_common_registers,
1680 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681 		radeon_program_register_sequence(rdev,
1682 						 spectre_golden_spm_registers,
1683 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684 		break;
1685 	case CHIP_HAWAII:
1686 		radeon_program_register_sequence(rdev,
1687 						 hawaii_mgcg_cgcg_init,
1688 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689 		radeon_program_register_sequence(rdev,
1690 						 hawaii_golden_registers,
1691 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692 		radeon_program_register_sequence(rdev,
1693 						 hawaii_golden_common_registers,
1694 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695 		radeon_program_register_sequence(rdev,
1696 						 hawaii_golden_spm_registers,
1697 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698 		break;
1699 	default:
1700 		break;
1701 	}
1702 	mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704 
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
cik_get_xclk(struct radeon_device * rdev)1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715         u32 reference_clock = rdev->clock.spll.reference_freq;
1716 
1717 	if (rdev->flags & RADEON_IS_IGP) {
1718 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719 			return reference_clock / 2;
1720 	} else {
1721 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722 			return reference_clock / 4;
1723 	}
1724 	return reference_clock;
1725 }
1726 
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
cik_mm_rdoorbell(struct radeon_device * rdev,u32 index)1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738 	if (index < rdev->doorbell.num_doorbells) {
1739 		return readl(rdev->doorbell.ptr + index);
1740 	} else {
1741 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742 		return 0;
1743 	}
1744 }
1745 
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
cik_mm_wdoorbell(struct radeon_device * rdev,u32 index,u32 v)1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758 	if (index < rdev->doorbell.num_doorbells) {
1759 		writel(v, rdev->doorbell.ptr + index);
1760 	} else {
1761 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762 	}
1763 }
1764 
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766 
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769 	{0x00000070, 0x04400000},
1770 	{0x00000071, 0x80c01803},
1771 	{0x00000072, 0x00004004},
1772 	{0x00000073, 0x00000100},
1773 	{0x00000074, 0x00ff0000},
1774 	{0x00000075, 0x34000000},
1775 	{0x00000076, 0x08000014},
1776 	{0x00000077, 0x00cc08ec},
1777 	{0x00000078, 0x00000400},
1778 	{0x00000079, 0x00000000},
1779 	{0x0000007a, 0x04090000},
1780 	{0x0000007c, 0x00000000},
1781 	{0x0000007e, 0x4408a8e8},
1782 	{0x0000007f, 0x00000304},
1783 	{0x00000080, 0x00000000},
1784 	{0x00000082, 0x00000001},
1785 	{0x00000083, 0x00000002},
1786 	{0x00000084, 0xf3e4f400},
1787 	{0x00000085, 0x052024e3},
1788 	{0x00000087, 0x00000000},
1789 	{0x00000088, 0x01000000},
1790 	{0x0000008a, 0x1c0a0000},
1791 	{0x0000008b, 0xff010000},
1792 	{0x0000008d, 0xffffefff},
1793 	{0x0000008e, 0xfff3efff},
1794 	{0x0000008f, 0xfff3efbf},
1795 	{0x00000092, 0xf7ffffff},
1796 	{0x00000093, 0xffffff7f},
1797 	{0x00000095, 0x00101101},
1798 	{0x00000096, 0x00000fff},
1799 	{0x00000097, 0x00116fff},
1800 	{0x00000098, 0x60010000},
1801 	{0x00000099, 0x10010000},
1802 	{0x0000009a, 0x00006000},
1803 	{0x0000009b, 0x00001000},
1804 	{0x0000009f, 0x00b48000}
1805 };
1806 
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808 
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811 	{0x0000007d, 0x40000000},
1812 	{0x0000007e, 0x40180304},
1813 	{0x0000007f, 0x0000ff00},
1814 	{0x00000081, 0x00000000},
1815 	{0x00000083, 0x00000800},
1816 	{0x00000086, 0x00000000},
1817 	{0x00000087, 0x00000100},
1818 	{0x00000088, 0x00020100},
1819 	{0x00000089, 0x00000000},
1820 	{0x0000008b, 0x00040000},
1821 	{0x0000008c, 0x00000100},
1822 	{0x0000008e, 0xff010000},
1823 	{0x00000090, 0xffffefff},
1824 	{0x00000091, 0xfff3efff},
1825 	{0x00000092, 0xfff3efbf},
1826 	{0x00000093, 0xf7ffffff},
1827 	{0x00000094, 0xffffff7f},
1828 	{0x00000095, 0x00000fff},
1829 	{0x00000096, 0x00116fff},
1830 	{0x00000097, 0x60010000},
1831 	{0x00000098, 0x10010000},
1832 	{0x0000009f, 0x00c79000}
1833 };
1834 
1835 
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
cik_srbm_select(struct radeon_device * rdev,u32 me,u32 pipe,u32 queue,u32 vmid)1849 static void cik_srbm_select(struct radeon_device *rdev,
1850 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853 			     MEID(me & 0x3) |
1854 			     VMID(vmid & 0xf) |
1855 			     QUEUEID(queue & 0x7));
1856 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858 
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
ci_mc_load_microcode(struct radeon_device * rdev)1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870 	const __be32 *fw_data = NULL;
1871 	const __le32 *new_fw_data = NULL;
1872 	u32 running, blackout = 0, tmp;
1873 	u32 *io_mc_regs = NULL;
1874 	const __le32 *new_io_mc_regs = NULL;
1875 	int i, regs_size, ucode_size;
1876 
1877 	if (!rdev->mc_fw)
1878 		return -EINVAL;
1879 
1880 	if (rdev->new_fw) {
1881 		const struct mc_firmware_header_v1_0 *hdr =
1882 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883 
1884 		radeon_ucode_print_mc_hdr(&hdr->header);
1885 
1886 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887 		new_io_mc_regs = (const __le32 *)
1888 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890 		new_fw_data = (const __le32 *)
1891 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892 	} else {
1893 		ucode_size = rdev->mc_fw->size / 4;
1894 
1895 		switch (rdev->family) {
1896 		case CHIP_BONAIRE:
1897 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899 			break;
1900 		case CHIP_HAWAII:
1901 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1903 			break;
1904 		default:
1905 			return -EINVAL;
1906 		}
1907 		fw_data = (const __be32 *)rdev->mc_fw->data;
1908 	}
1909 
1910 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911 
1912 	if (running == 0) {
1913 		if (running) {
1914 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916 		}
1917 
1918 		/* reset the engine and set to writable */
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921 
1922 		/* load mc io regs */
1923 		for (i = 0; i < regs_size; i++) {
1924 			if (rdev->new_fw) {
1925 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927 			} else {
1928 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930 			}
1931 		}
1932 
1933 		tmp = RREG32(MC_SEQ_MISC0);
1934 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939 		}
1940 
1941 		/* load the MC ucode */
1942 		for (i = 0; i < ucode_size; i++) {
1943 			if (rdev->new_fw)
1944 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945 			else
1946 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947 		}
1948 
1949 		/* put the engine back into the active state */
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953 
1954 		/* wait for training to complete */
1955 		for (i = 0; i < rdev->usec_timeout; i++) {
1956 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957 				break;
1958 			udelay(1);
1959 		}
1960 		for (i = 0; i < rdev->usec_timeout; i++) {
1961 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962 				break;
1963 			udelay(1);
1964 		}
1965 
1966 		if (running)
1967 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968 	}
1969 
1970 	return 0;
1971 }
1972 
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
cik_init_microcode(struct radeon_device * rdev)1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984 	const char *chip_name;
1985 	const char *new_chip_name;
1986 	size_t pfp_req_size, me_req_size, ce_req_size,
1987 		mec_req_size, rlc_req_size, mc_req_size = 0,
1988 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989 	char fw_name[30];
1990 	int new_fw = 0;
1991 	int err;
1992 	int num_fw;
1993 
1994 	DRM_DEBUG("\n");
1995 
1996 	switch (rdev->family) {
1997 	case CHIP_BONAIRE:
1998 		chip_name = "BONAIRE";
1999 		new_chip_name = "bonaire";
2000 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2002 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009 		num_fw = 8;
2010 		break;
2011 	case CHIP_HAWAII:
2012 		chip_name = "HAWAII";
2013 		new_chip_name = "hawaii";
2014 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023 		num_fw = 8;
2024 		break;
2025 	case CHIP_KAVERI:
2026 		chip_name = "KAVERI";
2027 		new_chip_name = "kaveri";
2028 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034 		num_fw = 7;
2035 		break;
2036 	case CHIP_KABINI:
2037 		chip_name = "KABINI";
2038 		new_chip_name = "kabini";
2039 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045 		num_fw = 6;
2046 		break;
2047 	case CHIP_MULLINS:
2048 		chip_name = "MULLINS";
2049 		new_chip_name = "mullins";
2050 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056 		num_fw = 6;
2057 		break;
2058 	default: BUG();
2059 	}
2060 
2061 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062 
2063 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065 	if (err) {
2066 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068 		if (err)
2069 			goto out;
2070 		if (rdev->pfp_fw->size != pfp_req_size) {
2071 			printk(KERN_ERR
2072 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073 			       rdev->pfp_fw->size, fw_name);
2074 			err = -EINVAL;
2075 			goto out;
2076 		}
2077 	} else {
2078 		err = radeon_ucode_validate(rdev->pfp_fw);
2079 		if (err) {
2080 			printk(KERN_ERR
2081 			       "cik_fw: validation failed for firmware \"%s\"\n",
2082 			       fw_name);
2083 			goto out;
2084 		} else {
2085 			new_fw++;
2086 		}
2087 	}
2088 
2089 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->me_fw->size != me_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->me_fw->size, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->me_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->ce_fw->size != ce_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->ce_fw->size, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->ce_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141 	if (err) {
2142 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144 		if (err)
2145 			goto out;
2146 		if (rdev->mec_fw->size != mec_req_size) {
2147 			printk(KERN_ERR
2148 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149 			       rdev->mec_fw->size, fw_name);
2150 			err = -EINVAL;
2151 		}
2152 	} else {
2153 		err = radeon_ucode_validate(rdev->mec_fw);
2154 		if (err) {
2155 			printk(KERN_ERR
2156 			       "cik_fw: validation failed for firmware \"%s\"\n",
2157 			       fw_name);
2158 			goto out;
2159 		} else {
2160 			new_fw++;
2161 		}
2162 	}
2163 
2164 	if (rdev->family == CHIP_KAVERI) {
2165 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167 		if (err) {
2168 			goto out;
2169 		} else {
2170 			err = radeon_ucode_validate(rdev->mec2_fw);
2171 			if (err) {
2172 				goto out;
2173 			} else {
2174 				new_fw++;
2175 			}
2176 		}
2177 	}
2178 
2179 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181 	if (err) {
2182 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184 		if (err)
2185 			goto out;
2186 		if (rdev->rlc_fw->size != rlc_req_size) {
2187 			printk(KERN_ERR
2188 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189 			       rdev->rlc_fw->size, fw_name);
2190 			err = -EINVAL;
2191 		}
2192 	} else {
2193 		err = radeon_ucode_validate(rdev->rlc_fw);
2194 		if (err) {
2195 			printk(KERN_ERR
2196 			       "cik_fw: validation failed for firmware \"%s\"\n",
2197 			       fw_name);
2198 			goto out;
2199 		} else {
2200 			new_fw++;
2201 		}
2202 	}
2203 
2204 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206 	if (err) {
2207 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209 		if (err)
2210 			goto out;
2211 		if (rdev->sdma_fw->size != sdma_req_size) {
2212 			printk(KERN_ERR
2213 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214 			       rdev->sdma_fw->size, fw_name);
2215 			err = -EINVAL;
2216 		}
2217 	} else {
2218 		err = radeon_ucode_validate(rdev->sdma_fw);
2219 		if (err) {
2220 			printk(KERN_ERR
2221 			       "cik_fw: validation failed for firmware \"%s\"\n",
2222 			       fw_name);
2223 			goto out;
2224 		} else {
2225 			new_fw++;
2226 		}
2227 	}
2228 
2229 	/* No SMC, MC ucode on APUs */
2230 	if (!(rdev->flags & RADEON_IS_IGP)) {
2231 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233 		if (err) {
2234 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236 			if (err) {
2237 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239 				if (err)
2240 					goto out;
2241 			}
2242 			if ((rdev->mc_fw->size != mc_req_size) &&
2243 			    (rdev->mc_fw->size != mc2_req_size)){
2244 				printk(KERN_ERR
2245 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246 				       rdev->mc_fw->size, fw_name);
2247 				err = -EINVAL;
2248 			}
2249 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250 		} else {
2251 			err = radeon_ucode_validate(rdev->mc_fw);
2252 			if (err) {
2253 				printk(KERN_ERR
2254 				       "cik_fw: validation failed for firmware \"%s\"\n",
2255 				       fw_name);
2256 				goto out;
2257 			} else {
2258 				new_fw++;
2259 			}
2260 		}
2261 
2262 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264 		if (err) {
2265 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267 			if (err) {
2268 				printk(KERN_ERR
2269 				       "smc: error loading firmware \"%s\"\n",
2270 				       fw_name);
2271 				release_firmware(rdev->smc_fw);
2272 				rdev->smc_fw = NULL;
2273 				err = 0;
2274 			} else if (rdev->smc_fw->size != smc_req_size) {
2275 				printk(KERN_ERR
2276 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277 				       rdev->smc_fw->size, fw_name);
2278 				err = -EINVAL;
2279 			}
2280 		} else {
2281 			err = radeon_ucode_validate(rdev->smc_fw);
2282 			if (err) {
2283 				printk(KERN_ERR
2284 				       "cik_fw: validation failed for firmware \"%s\"\n",
2285 				       fw_name);
2286 				goto out;
2287 			} else {
2288 				new_fw++;
2289 			}
2290 		}
2291 	}
2292 
2293 	if (new_fw == 0) {
2294 		rdev->new_fw = false;
2295 	} else if (new_fw < num_fw) {
2296 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297 		err = -EINVAL;
2298 	} else {
2299 		rdev->new_fw = true;
2300 	}
2301 
2302 out:
2303 	if (err) {
2304 		if (err != -EINVAL)
2305 			printk(KERN_ERR
2306 			       "cik_cp: Failed to load firmware \"%s\"\n",
2307 			       fw_name);
2308 		release_firmware(rdev->pfp_fw);
2309 		rdev->pfp_fw = NULL;
2310 		release_firmware(rdev->me_fw);
2311 		rdev->me_fw = NULL;
2312 		release_firmware(rdev->ce_fw);
2313 		rdev->ce_fw = NULL;
2314 		release_firmware(rdev->mec_fw);
2315 		rdev->mec_fw = NULL;
2316 		release_firmware(rdev->mec2_fw);
2317 		rdev->mec2_fw = NULL;
2318 		release_firmware(rdev->rlc_fw);
2319 		rdev->rlc_fw = NULL;
2320 		release_firmware(rdev->sdma_fw);
2321 		rdev->sdma_fw = NULL;
2322 		release_firmware(rdev->mc_fw);
2323 		rdev->mc_fw = NULL;
2324 		release_firmware(rdev->smc_fw);
2325 		rdev->smc_fw = NULL;
2326 	}
2327 	return err;
2328 }
2329 
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
cik_tiling_mode_table_init(struct radeon_device * rdev)2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346 	const u32 num_tile_mode_states = 32;
2347 	const u32 num_secondary_tile_mode_states = 16;
2348 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2349 	u32 num_pipe_configs;
2350 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2351 		rdev->config.cik.max_shader_engines;
2352 
2353 	switch (rdev->config.cik.mem_row_size_in_kb) {
2354 	case 1:
2355 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2356 		break;
2357 	case 2:
2358 	default:
2359 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2360 		break;
2361 	case 4:
2362 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2363 		break;
2364 	}
2365 
2366 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2367 	if (num_pipe_configs > 8)
2368 		num_pipe_configs = 16;
2369 
2370 	if (num_pipe_configs == 16) {
2371 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2372 			switch (reg_offset) {
2373 			case 0:
2374 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2378 				break;
2379 			case 1:
2380 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2384 				break;
2385 			case 2:
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2390 				break;
2391 			case 3:
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2396 				break;
2397 			case 4:
2398 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 						 TILE_SPLIT(split_equal_to_row_size));
2402 				break;
2403 			case 5:
2404 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407 				break;
2408 			case 6:
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2413 				break;
2414 			case 7:
2415 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2416 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2417 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 						 TILE_SPLIT(split_equal_to_row_size));
2419 				break;
2420 			case 8:
2421 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423 				break;
2424 			case 9:
2425 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2428 				break;
2429 			case 10:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			case 11:
2436 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 				break;
2441 			case 12:
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 				break;
2447 			case 13:
2448 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2451 				break;
2452 			case 14:
2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 				break;
2458 			case 16:
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2462 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 				break;
2464 			case 17:
2465 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2466 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2469 				break;
2470 			case 27:
2471 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2472 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2474 				break;
2475 			case 28:
2476 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 				break;
2481 			case 29:
2482 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2485 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2486 				break;
2487 			case 30:
2488 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 				break;
2493 			default:
2494 				gb_tile_moden = 0;
2495 				break;
2496 			}
2497 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2498 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2499 		}
2500 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2501 			switch (reg_offset) {
2502 			case 0:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506 						 NUM_BANKS(ADDR_SURF_16_BANK));
2507 				break;
2508 			case 1:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512 						 NUM_BANKS(ADDR_SURF_16_BANK));
2513 				break;
2514 			case 2:
2515 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518 						 NUM_BANKS(ADDR_SURF_16_BANK));
2519 				break;
2520 			case 3:
2521 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 						 NUM_BANKS(ADDR_SURF_16_BANK));
2525 				break;
2526 			case 4:
2527 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530 						 NUM_BANKS(ADDR_SURF_8_BANK));
2531 				break;
2532 			case 5:
2533 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536 						 NUM_BANKS(ADDR_SURF_4_BANK));
2537 				break;
2538 			case 6:
2539 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2542 						 NUM_BANKS(ADDR_SURF_2_BANK));
2543 				break;
2544 			case 8:
2545 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 						 NUM_BANKS(ADDR_SURF_16_BANK));
2549 				break;
2550 			case 9:
2551 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554 						 NUM_BANKS(ADDR_SURF_16_BANK));
2555 				break;
2556 			case 10:
2557 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560 						 NUM_BANKS(ADDR_SURF_16_BANK));
2561 				break;
2562 			case 11:
2563 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566 						 NUM_BANKS(ADDR_SURF_8_BANK));
2567 				break;
2568 			case 12:
2569 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2571 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572 						 NUM_BANKS(ADDR_SURF_4_BANK));
2573 				break;
2574 			case 13:
2575 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578 						 NUM_BANKS(ADDR_SURF_2_BANK));
2579 				break;
2580 			case 14:
2581 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2583 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584 						 NUM_BANKS(ADDR_SURF_2_BANK));
2585 				break;
2586 			default:
2587 				gb_tile_moden = 0;
2588 				break;
2589 			}
2590 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2591 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2592 		}
2593 	} else if (num_pipe_configs == 8) {
2594 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2595 			switch (reg_offset) {
2596 			case 0:
2597 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2599 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2601 				break;
2602 			case 1:
2603 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2607 				break;
2608 			case 2:
2609 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2611 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2613 				break;
2614 			case 3:
2615 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2617 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2619 				break;
2620 			case 4:
2621 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2623 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 						 TILE_SPLIT(split_equal_to_row_size));
2625 				break;
2626 			case 5:
2627 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2628 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2630 				break;
2631 			case 6:
2632 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2634 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2636 				break;
2637 			case 7:
2638 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2640 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 						 TILE_SPLIT(split_equal_to_row_size));
2642 				break;
2643 			case 8:
2644 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646 				break;
2647 			case 9:
2648 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2649 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2651 				break;
2652 			case 10:
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2655 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 				break;
2658 			case 11:
2659 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663 				break;
2664 			case 12:
2665 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2667 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669 				break;
2670 			case 13:
2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2672 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2673 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2674 				break;
2675 			case 14:
2676 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680 				break;
2681 			case 16:
2682 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686 				break;
2687 			case 17:
2688 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 				break;
2693 			case 27:
2694 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2697 				break;
2698 			case 28:
2699 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703 				break;
2704 			case 29:
2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709 				break;
2710 			case 30:
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 				break;
2716 			default:
2717 				gb_tile_moden = 0;
2718 				break;
2719 			}
2720 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2721 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2722 		}
2723 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2724 			switch (reg_offset) {
2725 			case 0:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK));
2730 				break;
2731 			case 1:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK));
2736 				break;
2737 			case 2:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK));
2742 				break;
2743 			case 3:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747 						 NUM_BANKS(ADDR_SURF_16_BANK));
2748 				break;
2749 			case 4:
2750 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753 						 NUM_BANKS(ADDR_SURF_8_BANK));
2754 				break;
2755 			case 5:
2756 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759 						 NUM_BANKS(ADDR_SURF_4_BANK));
2760 				break;
2761 			case 6:
2762 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765 						 NUM_BANKS(ADDR_SURF_2_BANK));
2766 				break;
2767 			case 8:
2768 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2770 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771 						 NUM_BANKS(ADDR_SURF_16_BANK));
2772 				break;
2773 			case 9:
2774 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777 						 NUM_BANKS(ADDR_SURF_16_BANK));
2778 				break;
2779 			case 10:
2780 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 						 NUM_BANKS(ADDR_SURF_16_BANK));
2784 				break;
2785 			case 11:
2786 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789 						 NUM_BANKS(ADDR_SURF_16_BANK));
2790 				break;
2791 			case 12:
2792 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2795 						 NUM_BANKS(ADDR_SURF_8_BANK));
2796 				break;
2797 			case 13:
2798 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801 						 NUM_BANKS(ADDR_SURF_4_BANK));
2802 				break;
2803 			case 14:
2804 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2807 						 NUM_BANKS(ADDR_SURF_2_BANK));
2808 				break;
2809 			default:
2810 				gb_tile_moden = 0;
2811 				break;
2812 			}
2813 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2814 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2815 		}
2816 	} else if (num_pipe_configs == 4) {
2817 		if (num_rbs == 4) {
2818 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2819 				switch (reg_offset) {
2820 				case 0:
2821 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2823 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2825 					break;
2826 				case 1:
2827 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2831 					break;
2832 				case 2:
2833 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2835 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2837 					break;
2838 				case 3:
2839 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2841 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2843 					break;
2844 				case 4:
2845 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2847 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 							 TILE_SPLIT(split_equal_to_row_size));
2849 					break;
2850 				case 5:
2851 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854 					break;
2855 				case 6:
2856 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2858 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2860 					break;
2861 				case 7:
2862 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2864 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 							 TILE_SPLIT(split_equal_to_row_size));
2866 					break;
2867 				case 8:
2868 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2869 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2870 					break;
2871 				case 9:
2872 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2875 					break;
2876 				case 10:
2877 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881 					break;
2882 				case 11:
2883 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2885 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887 					break;
2888 				case 12:
2889 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2891 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893 					break;
2894 				case 13:
2895 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2898 					break;
2899 				case 14:
2900 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904 					break;
2905 				case 16:
2906 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2909 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910 					break;
2911 				case 17:
2912 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 					break;
2917 				case 27:
2918 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2921 					break;
2922 				case 28:
2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2925 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 					break;
2928 				case 29:
2929 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933 					break;
2934 				case 30:
2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2936 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2937 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 					break;
2940 				default:
2941 					gb_tile_moden = 0;
2942 					break;
2943 				}
2944 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2945 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2946 			}
2947 		} else if (num_rbs < 4) {
2948 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2949 				switch (reg_offset) {
2950 				case 0:
2951 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2953 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2955 					break;
2956 				case 1:
2957 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2959 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2961 					break;
2962 				case 2:
2963 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2965 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2967 					break;
2968 				case 3:
2969 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2970 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2971 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2972 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2973 					break;
2974 				case 4:
2975 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2977 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978 							 TILE_SPLIT(split_equal_to_row_size));
2979 					break;
2980 				case 5:
2981 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2982 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984 					break;
2985 				case 6:
2986 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2988 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2990 					break;
2991 				case 7:
2992 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2994 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2995 							 TILE_SPLIT(split_equal_to_row_size));
2996 					break;
2997 				case 8:
2998 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2999 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
3000 					break;
3001 				case 9:
3002 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3005 					break;
3006 				case 10:
3007 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3009 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011 					break;
3012 				case 11:
3013 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3016 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 					break;
3018 				case 12:
3019 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3020 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3022 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3023 					break;
3024 				case 13:
3025 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3026 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3027 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3028 					break;
3029 				case 14:
3030 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3031 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3033 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034 					break;
3035 				case 16:
3036 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3039 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040 					break;
3041 				case 17:
3042 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3043 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3045 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046 					break;
3047 				case 27:
3048 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3049 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3050 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3051 					break;
3052 				case 28:
3053 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3054 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3056 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057 					break;
3058 				case 29:
3059 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3061 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3062 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3063 					break;
3064 				case 30:
3065 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3066 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3067 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3068 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 					break;
3070 				default:
3071 					gb_tile_moden = 0;
3072 					break;
3073 				}
3074 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3075 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3076 			}
3077 		}
3078 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3079 			switch (reg_offset) {
3080 			case 0:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084 						 NUM_BANKS(ADDR_SURF_16_BANK));
3085 				break;
3086 			case 1:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090 						 NUM_BANKS(ADDR_SURF_16_BANK));
3091 				break;
3092 			case 2:
3093 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096 						 NUM_BANKS(ADDR_SURF_16_BANK));
3097 				break;
3098 			case 3:
3099 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3102 						 NUM_BANKS(ADDR_SURF_16_BANK));
3103 				break;
3104 			case 4:
3105 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108 						 NUM_BANKS(ADDR_SURF_16_BANK));
3109 				break;
3110 			case 5:
3111 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3114 						 NUM_BANKS(ADDR_SURF_8_BANK));
3115 				break;
3116 			case 6:
3117 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120 						 NUM_BANKS(ADDR_SURF_4_BANK));
3121 				break;
3122 			case 8:
3123 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3124 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3125 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126 						 NUM_BANKS(ADDR_SURF_16_BANK));
3127 				break;
3128 			case 9:
3129 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3131 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132 						 NUM_BANKS(ADDR_SURF_16_BANK));
3133 				break;
3134 			case 10:
3135 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138 						 NUM_BANKS(ADDR_SURF_16_BANK));
3139 				break;
3140 			case 11:
3141 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3143 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144 						 NUM_BANKS(ADDR_SURF_16_BANK));
3145 				break;
3146 			case 12:
3147 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 						 NUM_BANKS(ADDR_SURF_16_BANK));
3151 				break;
3152 			case 13:
3153 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156 						 NUM_BANKS(ADDR_SURF_8_BANK));
3157 				break;
3158 			case 14:
3159 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162 						 NUM_BANKS(ADDR_SURF_4_BANK));
3163 				break;
3164 			default:
3165 				gb_tile_moden = 0;
3166 				break;
3167 			}
3168 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3169 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3170 		}
3171 	} else if (num_pipe_configs == 2) {
3172 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3173 			switch (reg_offset) {
3174 			case 0:
3175 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3177 						 PIPE_CONFIG(ADDR_SURF_P2) |
3178 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3179 				break;
3180 			case 1:
3181 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3183 						 PIPE_CONFIG(ADDR_SURF_P2) |
3184 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3185 				break;
3186 			case 2:
3187 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3188 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3189 						 PIPE_CONFIG(ADDR_SURF_P2) |
3190 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3191 				break;
3192 			case 3:
3193 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3194 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3195 						 PIPE_CONFIG(ADDR_SURF_P2) |
3196 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3197 				break;
3198 			case 4:
3199 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3201 						 PIPE_CONFIG(ADDR_SURF_P2) |
3202 						 TILE_SPLIT(split_equal_to_row_size));
3203 				break;
3204 			case 5:
3205 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206 						 PIPE_CONFIG(ADDR_SURF_P2) |
3207 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208 				break;
3209 			case 6:
3210 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3212 						 PIPE_CONFIG(ADDR_SURF_P2) |
3213 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3214 				break;
3215 			case 7:
3216 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3217 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3218 						 PIPE_CONFIG(ADDR_SURF_P2) |
3219 						 TILE_SPLIT(split_equal_to_row_size));
3220 				break;
3221 			case 8:
3222 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3223 						PIPE_CONFIG(ADDR_SURF_P2);
3224 				break;
3225 			case 9:
3226 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3228 						 PIPE_CONFIG(ADDR_SURF_P2));
3229 				break;
3230 			case 10:
3231 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3233 						 PIPE_CONFIG(ADDR_SURF_P2) |
3234 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 				break;
3236 			case 11:
3237 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239 						 PIPE_CONFIG(ADDR_SURF_P2) |
3240 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3241 				break;
3242 			case 12:
3243 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3244 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245 						 PIPE_CONFIG(ADDR_SURF_P2) |
3246 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247 				break;
3248 			case 13:
3249 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250 						 PIPE_CONFIG(ADDR_SURF_P2) |
3251 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3252 				break;
3253 			case 14:
3254 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256 						 PIPE_CONFIG(ADDR_SURF_P2) |
3257 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258 				break;
3259 			case 16:
3260 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262 						 PIPE_CONFIG(ADDR_SURF_P2) |
3263 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264 				break;
3265 			case 17:
3266 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3267 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268 						 PIPE_CONFIG(ADDR_SURF_P2) |
3269 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270 				break;
3271 			case 27:
3272 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3274 						 PIPE_CONFIG(ADDR_SURF_P2));
3275 				break;
3276 			case 28:
3277 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3278 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279 						 PIPE_CONFIG(ADDR_SURF_P2) |
3280 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3281 				break;
3282 			case 29:
3283 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3285 						 PIPE_CONFIG(ADDR_SURF_P2) |
3286 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287 				break;
3288 			case 30:
3289 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3290 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3291 						 PIPE_CONFIG(ADDR_SURF_P2) |
3292 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293 				break;
3294 			default:
3295 				gb_tile_moden = 0;
3296 				break;
3297 			}
3298 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3299 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3300 		}
3301 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3302 			switch (reg_offset) {
3303 			case 0:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 1:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313 						 NUM_BANKS(ADDR_SURF_16_BANK));
3314 				break;
3315 			case 2:
3316 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 						 NUM_BANKS(ADDR_SURF_16_BANK));
3320 				break;
3321 			case 3:
3322 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3325 						 NUM_BANKS(ADDR_SURF_16_BANK));
3326 				break;
3327 			case 4:
3328 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331 						 NUM_BANKS(ADDR_SURF_16_BANK));
3332 				break;
3333 			case 5:
3334 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337 						 NUM_BANKS(ADDR_SURF_16_BANK));
3338 				break;
3339 			case 6:
3340 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343 						 NUM_BANKS(ADDR_SURF_8_BANK));
3344 				break;
3345 			case 8:
3346 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3347 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3348 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349 						 NUM_BANKS(ADDR_SURF_16_BANK));
3350 				break;
3351 			case 9:
3352 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3353 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355 						 NUM_BANKS(ADDR_SURF_16_BANK));
3356 				break;
3357 			case 10:
3358 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3359 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3360 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361 						 NUM_BANKS(ADDR_SURF_16_BANK));
3362 				break;
3363 			case 11:
3364 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367 						 NUM_BANKS(ADDR_SURF_16_BANK));
3368 				break;
3369 			case 12:
3370 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373 						 NUM_BANKS(ADDR_SURF_16_BANK));
3374 				break;
3375 			case 13:
3376 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379 						 NUM_BANKS(ADDR_SURF_16_BANK));
3380 				break;
3381 			case 14:
3382 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3384 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3385 						 NUM_BANKS(ADDR_SURF_8_BANK));
3386 				break;
3387 			default:
3388 				gb_tile_moden = 0;
3389 				break;
3390 			}
3391 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3392 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3393 		}
3394 	} else
3395 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3396 }
3397 
3398 /**
3399  * cik_select_se_sh - select which SE, SH to address
3400  *
3401  * @rdev: radeon_device pointer
3402  * @se_num: shader engine to address
3403  * @sh_num: sh block to address
3404  *
3405  * Select which SE, SH combinations to address. Certain
3406  * registers are instanced per SE or SH.  0xffffffff means
3407  * broadcast to all SEs or SHs (CIK).
3408  */
cik_select_se_sh(struct radeon_device * rdev,u32 se_num,u32 sh_num)3409 static void cik_select_se_sh(struct radeon_device *rdev,
3410 			     u32 se_num, u32 sh_num)
3411 {
3412 	u32 data = INSTANCE_BROADCAST_WRITES;
3413 
3414 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3415 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3416 	else if (se_num == 0xffffffff)
3417 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3418 	else if (sh_num == 0xffffffff)
3419 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3420 	else
3421 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3422 	WREG32(GRBM_GFX_INDEX, data);
3423 }
3424 
3425 /**
3426  * cik_create_bitmask - create a bitmask
3427  *
3428  * @bit_width: length of the mask
3429  *
3430  * create a variable length bit mask (CIK).
3431  * Returns the bitmask.
3432  */
cik_create_bitmask(u32 bit_width)3433 static u32 cik_create_bitmask(u32 bit_width)
3434 {
3435 	u32 i, mask = 0;
3436 
3437 	for (i = 0; i < bit_width; i++) {
3438 		mask <<= 1;
3439 		mask |= 1;
3440 	}
3441 	return mask;
3442 }
3443 
3444 /**
3445  * cik_get_rb_disabled - computes the mask of disabled RBs
3446  *
3447  * @rdev: radeon_device pointer
3448  * @max_rb_num: max RBs (render backends) for the asic
3449  * @se_num: number of SEs (shader engines) for the asic
3450  * @sh_per_se: number of SH blocks per SE for the asic
3451  *
3452  * Calculates the bitmask of disabled RBs (CIK).
3453  * Returns the disabled RB bitmask.
3454  */
cik_get_rb_disabled(struct radeon_device * rdev,u32 max_rb_num_per_se,u32 sh_per_se)3455 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3456 			      u32 max_rb_num_per_se,
3457 			      u32 sh_per_se)
3458 {
3459 	u32 data, mask;
3460 
3461 	data = RREG32(CC_RB_BACKEND_DISABLE);
3462 	if (data & 1)
3463 		data &= BACKEND_DISABLE_MASK;
3464 	else
3465 		data = 0;
3466 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3467 
3468 	data >>= BACKEND_DISABLE_SHIFT;
3469 
3470 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3471 
3472 	return data & mask;
3473 }
3474 
3475 /**
3476  * cik_setup_rb - setup the RBs on the asic
3477  *
3478  * @rdev: radeon_device pointer
3479  * @se_num: number of SEs (shader engines) for the asic
3480  * @sh_per_se: number of SH blocks per SE for the asic
3481  * @max_rb_num: max RBs (render backends) for the asic
3482  *
3483  * Configures per-SE/SH RB registers (CIK).
3484  */
cik_setup_rb(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 max_rb_num_per_se)3485 static void cik_setup_rb(struct radeon_device *rdev,
3486 			 u32 se_num, u32 sh_per_se,
3487 			 u32 max_rb_num_per_se)
3488 {
3489 	int i, j;
3490 	u32 data, mask;
3491 	u32 disabled_rbs = 0;
3492 	u32 enabled_rbs = 0;
3493 
3494 	mutex_lock(&rdev->grbm_idx_mutex);
3495 	for (i = 0; i < se_num; i++) {
3496 		for (j = 0; j < sh_per_se; j++) {
3497 			cik_select_se_sh(rdev, i, j);
3498 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3499 			if (rdev->family == CHIP_HAWAII)
3500 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3501 			else
3502 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3503 		}
3504 	}
3505 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3506 	mutex_unlock(&rdev->grbm_idx_mutex);
3507 
3508 	mask = 1;
3509 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3510 		if (!(disabled_rbs & mask))
3511 			enabled_rbs |= mask;
3512 		mask <<= 1;
3513 	}
3514 
3515 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3516 
3517 	mutex_lock(&rdev->grbm_idx_mutex);
3518 	for (i = 0; i < se_num; i++) {
3519 		cik_select_se_sh(rdev, i, 0xffffffff);
3520 		data = 0;
3521 		for (j = 0; j < sh_per_se; j++) {
3522 			switch (enabled_rbs & 3) {
3523 			case 0:
3524 				if (j == 0)
3525 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3526 				else
3527 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3528 				break;
3529 			case 1:
3530 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3531 				break;
3532 			case 2:
3533 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3534 				break;
3535 			case 3:
3536 			default:
3537 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3538 				break;
3539 			}
3540 			enabled_rbs >>= 2;
3541 		}
3542 		WREG32(PA_SC_RASTER_CONFIG, data);
3543 	}
3544 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3545 	mutex_unlock(&rdev->grbm_idx_mutex);
3546 }
3547 
3548 /**
3549  * cik_gpu_init - setup the 3D engine
3550  *
3551  * @rdev: radeon_device pointer
3552  *
3553  * Configures the 3D engine and tiling configuration
3554  * registers so that the 3D engine is usable.
3555  */
cik_gpu_init(struct radeon_device * rdev)3556 static void cik_gpu_init(struct radeon_device *rdev)
3557 {
3558 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3559 	u32 mc_shared_chmap, mc_arb_ramcfg;
3560 	u32 hdp_host_path_cntl;
3561 	u32 tmp;
3562 	int i, j;
3563 
3564 	switch (rdev->family) {
3565 	case CHIP_BONAIRE:
3566 		rdev->config.cik.max_shader_engines = 2;
3567 		rdev->config.cik.max_tile_pipes = 4;
3568 		rdev->config.cik.max_cu_per_sh = 7;
3569 		rdev->config.cik.max_sh_per_se = 1;
3570 		rdev->config.cik.max_backends_per_se = 2;
3571 		rdev->config.cik.max_texture_channel_caches = 4;
3572 		rdev->config.cik.max_gprs = 256;
3573 		rdev->config.cik.max_gs_threads = 32;
3574 		rdev->config.cik.max_hw_contexts = 8;
3575 
3576 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3577 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3578 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3579 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3580 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3581 		break;
3582 	case CHIP_HAWAII:
3583 		rdev->config.cik.max_shader_engines = 4;
3584 		rdev->config.cik.max_tile_pipes = 16;
3585 		rdev->config.cik.max_cu_per_sh = 11;
3586 		rdev->config.cik.max_sh_per_se = 1;
3587 		rdev->config.cik.max_backends_per_se = 4;
3588 		rdev->config.cik.max_texture_channel_caches = 16;
3589 		rdev->config.cik.max_gprs = 256;
3590 		rdev->config.cik.max_gs_threads = 32;
3591 		rdev->config.cik.max_hw_contexts = 8;
3592 
3593 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3594 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3595 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3596 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3597 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3598 		break;
3599 	case CHIP_KAVERI:
3600 		rdev->config.cik.max_shader_engines = 1;
3601 		rdev->config.cik.max_tile_pipes = 4;
3602 		rdev->config.cik.max_cu_per_sh = 8;
3603 		rdev->config.cik.max_backends_per_se = 2;
3604 		rdev->config.cik.max_sh_per_se = 1;
3605 		rdev->config.cik.max_texture_channel_caches = 4;
3606 		rdev->config.cik.max_gprs = 256;
3607 		rdev->config.cik.max_gs_threads = 16;
3608 		rdev->config.cik.max_hw_contexts = 8;
3609 
3610 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3611 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3612 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3613 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3614 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3615 		break;
3616 	case CHIP_KABINI:
3617 	case CHIP_MULLINS:
3618 	default:
3619 		rdev->config.cik.max_shader_engines = 1;
3620 		rdev->config.cik.max_tile_pipes = 2;
3621 		rdev->config.cik.max_cu_per_sh = 2;
3622 		rdev->config.cik.max_sh_per_se = 1;
3623 		rdev->config.cik.max_backends_per_se = 1;
3624 		rdev->config.cik.max_texture_channel_caches = 2;
3625 		rdev->config.cik.max_gprs = 256;
3626 		rdev->config.cik.max_gs_threads = 16;
3627 		rdev->config.cik.max_hw_contexts = 8;
3628 
3629 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3630 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3631 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3632 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3633 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3634 		break;
3635 	}
3636 
3637 	/* Initialize HDP */
3638 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3639 		WREG32((0x2c14 + j), 0x00000000);
3640 		WREG32((0x2c18 + j), 0x00000000);
3641 		WREG32((0x2c1c + j), 0x00000000);
3642 		WREG32((0x2c20 + j), 0x00000000);
3643 		WREG32((0x2c24 + j), 0x00000000);
3644 	}
3645 
3646 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3647 	WREG32(SRBM_INT_CNTL, 0x1);
3648 	WREG32(SRBM_INT_ACK, 0x1);
3649 
3650 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3651 
3652 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3653 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3654 
3655 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3656 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3657 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3658 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3659 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3660 		rdev->config.cik.mem_row_size_in_kb = 4;
3661 	/* XXX use MC settings? */
3662 	rdev->config.cik.shader_engine_tile_size = 32;
3663 	rdev->config.cik.num_gpus = 1;
3664 	rdev->config.cik.multi_gpu_tile_size = 64;
3665 
3666 	/* fix up row size */
3667 	gb_addr_config &= ~ROW_SIZE_MASK;
3668 	switch (rdev->config.cik.mem_row_size_in_kb) {
3669 	case 1:
3670 	default:
3671 		gb_addr_config |= ROW_SIZE(0);
3672 		break;
3673 	case 2:
3674 		gb_addr_config |= ROW_SIZE(1);
3675 		break;
3676 	case 4:
3677 		gb_addr_config |= ROW_SIZE(2);
3678 		break;
3679 	}
3680 
3681 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3682 	 * not have bank info, so create a custom tiling dword.
3683 	 * bits 3:0   num_pipes
3684 	 * bits 7:4   num_banks
3685 	 * bits 11:8  group_size
3686 	 * bits 15:12 row_size
3687 	 */
3688 	rdev->config.cik.tile_config = 0;
3689 	switch (rdev->config.cik.num_tile_pipes) {
3690 	case 1:
3691 		rdev->config.cik.tile_config |= (0 << 0);
3692 		break;
3693 	case 2:
3694 		rdev->config.cik.tile_config |= (1 << 0);
3695 		break;
3696 	case 4:
3697 		rdev->config.cik.tile_config |= (2 << 0);
3698 		break;
3699 	case 8:
3700 	default:
3701 		/* XXX what about 12? */
3702 		rdev->config.cik.tile_config |= (3 << 0);
3703 		break;
3704 	}
3705 	rdev->config.cik.tile_config |=
3706 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3707 	rdev->config.cik.tile_config |=
3708 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3709 	rdev->config.cik.tile_config |=
3710 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3711 
3712 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3713 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3714 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3715 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3716 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3717 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3718 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3719 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3720 
3721 	cik_tiling_mode_table_init(rdev);
3722 
3723 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3724 		     rdev->config.cik.max_sh_per_se,
3725 		     rdev->config.cik.max_backends_per_se);
3726 
3727 	rdev->config.cik.active_cus = 0;
3728 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3729 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3730 			rdev->config.cik.active_cus +=
3731 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3732 		}
3733 	}
3734 
3735 	/* set HW defaults for 3D engine */
3736 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3737 
3738 	mutex_lock(&rdev->grbm_idx_mutex);
3739 	/*
3740 	 * making sure that the following register writes will be broadcasted
3741 	 * to all the shaders
3742 	 */
3743 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3744 	WREG32(SX_DEBUG_1, 0x20);
3745 
3746 	WREG32(TA_CNTL_AUX, 0x00010000);
3747 
3748 	tmp = RREG32(SPI_CONFIG_CNTL);
3749 	tmp |= 0x03000000;
3750 	WREG32(SPI_CONFIG_CNTL, tmp);
3751 
3752 	WREG32(SQ_CONFIG, 1);
3753 
3754 	WREG32(DB_DEBUG, 0);
3755 
3756 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3757 	tmp |= 0x00000400;
3758 	WREG32(DB_DEBUG2, tmp);
3759 
3760 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3761 	tmp |= 0x00020200;
3762 	WREG32(DB_DEBUG3, tmp);
3763 
3764 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3765 	tmp |= 0x00018208;
3766 	WREG32(CB_HW_CONTROL, tmp);
3767 
3768 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3769 
3770 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3771 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3772 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3773 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3774 
3775 	WREG32(VGT_NUM_INSTANCES, 1);
3776 
3777 	WREG32(CP_PERFMON_CNTL, 0);
3778 
3779 	WREG32(SQ_CONFIG, 0);
3780 
3781 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3782 					  FORCE_EOV_MAX_REZ_CNT(255)));
3783 
3784 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3785 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3786 
3787 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3788 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3789 
3790 	tmp = RREG32(HDP_MISC_CNTL);
3791 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3792 	WREG32(HDP_MISC_CNTL, tmp);
3793 
3794 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3795 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3796 
3797 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3798 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3799 	mutex_unlock(&rdev->grbm_idx_mutex);
3800 
3801 	udelay(50);
3802 }
3803 
3804 /*
3805  * GPU scratch registers helpers function.
3806  */
3807 /**
3808  * cik_scratch_init - setup driver info for CP scratch regs
3809  *
3810  * @rdev: radeon_device pointer
3811  *
3812  * Set up the number and offset of the CP scratch registers.
3813  * NOTE: use of CP scratch registers is a legacy inferface and
3814  * is not used by default on newer asics (r6xx+).  On newer asics,
3815  * memory buffers are used for fences rather than scratch regs.
3816  */
cik_scratch_init(struct radeon_device * rdev)3817 static void cik_scratch_init(struct radeon_device *rdev)
3818 {
3819 	int i;
3820 
3821 	rdev->scratch.num_reg = 7;
3822 	rdev->scratch.reg_base = SCRATCH_REG0;
3823 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3824 		rdev->scratch.free[i] = true;
3825 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3826 	}
3827 }
3828 
3829 /**
3830  * cik_ring_test - basic gfx ring test
3831  *
3832  * @rdev: radeon_device pointer
3833  * @ring: radeon_ring structure holding ring information
3834  *
3835  * Allocate a scratch register and write to it using the gfx ring (CIK).
3836  * Provides a basic gfx ring test to verify that the ring is working.
3837  * Used by cik_cp_gfx_resume();
3838  * Returns 0 on success, error on failure.
3839  */
cik_ring_test(struct radeon_device * rdev,struct radeon_ring * ring)3840 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3841 {
3842 	uint32_t scratch;
3843 	uint32_t tmp = 0;
3844 	unsigned i;
3845 	int r;
3846 
3847 	r = radeon_scratch_get(rdev, &scratch);
3848 	if (r) {
3849 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3850 		return r;
3851 	}
3852 	WREG32(scratch, 0xCAFEDEAD);
3853 	r = radeon_ring_lock(rdev, ring, 3);
3854 	if (r) {
3855 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3856 		radeon_scratch_free(rdev, scratch);
3857 		return r;
3858 	}
3859 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3860 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3861 	radeon_ring_write(ring, 0xDEADBEEF);
3862 	radeon_ring_unlock_commit(rdev, ring, false);
3863 
3864 	for (i = 0; i < rdev->usec_timeout; i++) {
3865 		tmp = RREG32(scratch);
3866 		if (tmp == 0xDEADBEEF)
3867 			break;
3868 		DRM_UDELAY(1);
3869 	}
3870 	if (i < rdev->usec_timeout) {
3871 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3872 	} else {
3873 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3874 			  ring->idx, scratch, tmp);
3875 		r = -EINVAL;
3876 	}
3877 	radeon_scratch_free(rdev, scratch);
3878 	return r;
3879 }
3880 
3881 /**
3882  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3883  *
3884  * @rdev: radeon_device pointer
3885  * @ridx: radeon ring index
3886  *
3887  * Emits an hdp flush on the cp.
3888  */
cik_hdp_flush_cp_ring_emit(struct radeon_device * rdev,int ridx)3889 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3890 				       int ridx)
3891 {
3892 	struct radeon_ring *ring = &rdev->ring[ridx];
3893 	u32 ref_and_mask;
3894 
3895 	switch (ring->idx) {
3896 	case CAYMAN_RING_TYPE_CP1_INDEX:
3897 	case CAYMAN_RING_TYPE_CP2_INDEX:
3898 	default:
3899 		switch (ring->me) {
3900 		case 0:
3901 			ref_and_mask = CP2 << ring->pipe;
3902 			break;
3903 		case 1:
3904 			ref_and_mask = CP6 << ring->pipe;
3905 			break;
3906 		default:
3907 			return;
3908 		}
3909 		break;
3910 	case RADEON_RING_TYPE_GFX_INDEX:
3911 		ref_and_mask = CP0;
3912 		break;
3913 	}
3914 
3915 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3916 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3917 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3918 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3919 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3920 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3921 	radeon_ring_write(ring, ref_and_mask);
3922 	radeon_ring_write(ring, ref_and_mask);
3923 	radeon_ring_write(ring, 0x20); /* poll interval */
3924 }
3925 
3926 /**
3927  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3928  *
3929  * @rdev: radeon_device pointer
3930  * @fence: radeon fence object
3931  *
3932  * Emits a fence sequnce number on the gfx ring and flushes
3933  * GPU caches.
3934  */
cik_fence_gfx_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3935 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3936 			     struct radeon_fence *fence)
3937 {
3938 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3939 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3940 
3941 	/* Workaround for cache flush problems. First send a dummy EOP
3942 	 * event down the pipe with seq one below.
3943 	 */
3944 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3945 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3946 				 EOP_TC_ACTION_EN |
3947 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3948 				 EVENT_INDEX(5)));
3949 	radeon_ring_write(ring, addr & 0xfffffffc);
3950 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3951 				DATA_SEL(1) | INT_SEL(0));
3952 	radeon_ring_write(ring, fence->seq - 1);
3953 	radeon_ring_write(ring, 0);
3954 
3955 	/* Then send the real EOP event down the pipe. */
3956 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3957 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3958 				 EOP_TC_ACTION_EN |
3959 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3960 				 EVENT_INDEX(5)));
3961 	radeon_ring_write(ring, addr & 0xfffffffc);
3962 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3963 	radeon_ring_write(ring, fence->seq);
3964 	radeon_ring_write(ring, 0);
3965 }
3966 
3967 /**
3968  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3969  *
3970  * @rdev: radeon_device pointer
3971  * @fence: radeon fence object
3972  *
3973  * Emits a fence sequnce number on the compute ring and flushes
3974  * GPU caches.
3975  */
cik_fence_compute_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3976 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3977 				 struct radeon_fence *fence)
3978 {
3979 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3980 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3981 
3982 	/* RELEASE_MEM - flush caches, send int */
3983 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3984 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3985 				 EOP_TC_ACTION_EN |
3986 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3987 				 EVENT_INDEX(5)));
3988 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3989 	radeon_ring_write(ring, addr & 0xfffffffc);
3990 	radeon_ring_write(ring, upper_32_bits(addr));
3991 	radeon_ring_write(ring, fence->seq);
3992 	radeon_ring_write(ring, 0);
3993 }
3994 
3995 /**
3996  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3997  *
3998  * @rdev: radeon_device pointer
3999  * @ring: radeon ring buffer object
4000  * @semaphore: radeon semaphore object
4001  * @emit_wait: Is this a sempahore wait?
4002  *
4003  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4004  * from running ahead of semaphore waits.
4005  */
cik_semaphore_ring_emit(struct radeon_device * rdev,struct radeon_ring * ring,struct radeon_semaphore * semaphore,bool emit_wait)4006 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4007 			     struct radeon_ring *ring,
4008 			     struct radeon_semaphore *semaphore,
4009 			     bool emit_wait)
4010 {
4011 	uint64_t addr = semaphore->gpu_addr;
4012 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4013 
4014 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4015 	radeon_ring_write(ring, lower_32_bits(addr));
4016 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4017 
4018 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4019 		/* Prevent the PFP from running ahead of the semaphore wait */
4020 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4021 		radeon_ring_write(ring, 0x0);
4022 	}
4023 
4024 	return true;
4025 }
4026 
4027 /**
4028  * cik_copy_cpdma - copy pages using the CP DMA engine
4029  *
4030  * @rdev: radeon_device pointer
4031  * @src_offset: src GPU address
4032  * @dst_offset: dst GPU address
4033  * @num_gpu_pages: number of GPU pages to xfer
4034  * @resv: reservation object to sync to
4035  *
4036  * Copy GPU paging using the CP DMA engine (CIK+).
4037  * Used by the radeon ttm implementation to move pages if
4038  * registered as the asic copy callback.
4039  */
cik_copy_cpdma(struct radeon_device * rdev,uint64_t src_offset,uint64_t dst_offset,unsigned num_gpu_pages,struct reservation_object * resv)4040 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4041 				    uint64_t src_offset, uint64_t dst_offset,
4042 				    unsigned num_gpu_pages,
4043 				    struct reservation_object *resv)
4044 {
4045 	struct radeon_fence *fence;
4046 	struct radeon_sync sync;
4047 	int ring_index = rdev->asic->copy.blit_ring_index;
4048 	struct radeon_ring *ring = &rdev->ring[ring_index];
4049 	u32 size_in_bytes, cur_size_in_bytes, control;
4050 	int i, num_loops;
4051 	int r = 0;
4052 
4053 	radeon_sync_create(&sync);
4054 
4055 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4056 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4057 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4058 	if (r) {
4059 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4060 		radeon_sync_free(rdev, &sync, NULL);
4061 		return ERR_PTR(r);
4062 	}
4063 
4064 	radeon_sync_resv(rdev, &sync, resv, false);
4065 	radeon_sync_rings(rdev, &sync, ring->idx);
4066 
4067 	for (i = 0; i < num_loops; i++) {
4068 		cur_size_in_bytes = size_in_bytes;
4069 		if (cur_size_in_bytes > 0x1fffff)
4070 			cur_size_in_bytes = 0x1fffff;
4071 		size_in_bytes -= cur_size_in_bytes;
4072 		control = 0;
4073 		if (size_in_bytes == 0)
4074 			control |= PACKET3_DMA_DATA_CP_SYNC;
4075 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4076 		radeon_ring_write(ring, control);
4077 		radeon_ring_write(ring, lower_32_bits(src_offset));
4078 		radeon_ring_write(ring, upper_32_bits(src_offset));
4079 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4080 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4081 		radeon_ring_write(ring, cur_size_in_bytes);
4082 		src_offset += cur_size_in_bytes;
4083 		dst_offset += cur_size_in_bytes;
4084 	}
4085 
4086 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4087 	if (r) {
4088 		radeon_ring_unlock_undo(rdev, ring);
4089 		radeon_sync_free(rdev, &sync, NULL);
4090 		return ERR_PTR(r);
4091 	}
4092 
4093 	radeon_ring_unlock_commit(rdev, ring, false);
4094 	radeon_sync_free(rdev, &sync, fence);
4095 
4096 	return fence;
4097 }
4098 
4099 /*
4100  * IB stuff
4101  */
4102 /**
4103  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4104  *
4105  * @rdev: radeon_device pointer
4106  * @ib: radeon indirect buffer object
4107  *
4108  * Emits an DE (drawing engine) or CE (constant engine) IB
4109  * on the gfx ring.  IBs are usually generated by userspace
4110  * acceleration drivers and submitted to the kernel for
4111  * sheduling on the ring.  This function schedules the IB
4112  * on the gfx ring for execution by the GPU.
4113  */
cik_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)4114 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4115 {
4116 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4117 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4118 	u32 header, control = INDIRECT_BUFFER_VALID;
4119 
4120 	if (ib->is_const_ib) {
4121 		/* set switch buffer packet before const IB */
4122 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4123 		radeon_ring_write(ring, 0);
4124 
4125 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4126 	} else {
4127 		u32 next_rptr;
4128 		if (ring->rptr_save_reg) {
4129 			next_rptr = ring->wptr + 3 + 4;
4130 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4131 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4132 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4133 			radeon_ring_write(ring, next_rptr);
4134 		} else if (rdev->wb.enabled) {
4135 			next_rptr = ring->wptr + 5 + 4;
4136 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4137 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4138 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4139 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4140 			radeon_ring_write(ring, next_rptr);
4141 		}
4142 
4143 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4144 	}
4145 
4146 	control |= ib->length_dw | (vm_id << 24);
4147 
4148 	radeon_ring_write(ring, header);
4149 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
4150 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4151 	radeon_ring_write(ring, control);
4152 }
4153 
4154 /**
4155  * cik_ib_test - basic gfx ring IB test
4156  *
4157  * @rdev: radeon_device pointer
4158  * @ring: radeon_ring structure holding ring information
4159  *
4160  * Allocate an IB and execute it on the gfx ring (CIK).
4161  * Provides a basic gfx ring test to verify that IBs are working.
4162  * Returns 0 on success, error on failure.
4163  */
cik_ib_test(struct radeon_device * rdev,struct radeon_ring * ring)4164 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4165 {
4166 	struct radeon_ib ib;
4167 	uint32_t scratch;
4168 	uint32_t tmp = 0;
4169 	unsigned i;
4170 	int r;
4171 
4172 	r = radeon_scratch_get(rdev, &scratch);
4173 	if (r) {
4174 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4175 		return r;
4176 	}
4177 	WREG32(scratch, 0xCAFEDEAD);
4178 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4179 	if (r) {
4180 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4181 		radeon_scratch_free(rdev, scratch);
4182 		return r;
4183 	}
4184 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4185 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4186 	ib.ptr[2] = 0xDEADBEEF;
4187 	ib.length_dw = 3;
4188 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4189 	if (r) {
4190 		radeon_scratch_free(rdev, scratch);
4191 		radeon_ib_free(rdev, &ib);
4192 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4193 		return r;
4194 	}
4195 	r = radeon_fence_wait(ib.fence, false);
4196 	if (r) {
4197 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4198 		radeon_scratch_free(rdev, scratch);
4199 		radeon_ib_free(rdev, &ib);
4200 		return r;
4201 	}
4202 	for (i = 0; i < rdev->usec_timeout; i++) {
4203 		tmp = RREG32(scratch);
4204 		if (tmp == 0xDEADBEEF)
4205 			break;
4206 		DRM_UDELAY(1);
4207 	}
4208 	if (i < rdev->usec_timeout) {
4209 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4210 	} else {
4211 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4212 			  scratch, tmp);
4213 		r = -EINVAL;
4214 	}
4215 	radeon_scratch_free(rdev, scratch);
4216 	radeon_ib_free(rdev, &ib);
4217 	return r;
4218 }
4219 
4220 /*
4221  * CP.
4222  * On CIK, gfx and compute now have independant command processors.
4223  *
4224  * GFX
4225  * Gfx consists of a single ring and can process both gfx jobs and
4226  * compute jobs.  The gfx CP consists of three microengines (ME):
4227  * PFP - Pre-Fetch Parser
4228  * ME - Micro Engine
4229  * CE - Constant Engine
4230  * The PFP and ME make up what is considered the Drawing Engine (DE).
4231  * The CE is an asynchronous engine used for updating buffer desciptors
4232  * used by the DE so that they can be loaded into cache in parallel
4233  * while the DE is processing state update packets.
4234  *
4235  * Compute
4236  * The compute CP consists of two microengines (ME):
4237  * MEC1 - Compute MicroEngine 1
4238  * MEC2 - Compute MicroEngine 2
4239  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4240  * The queues are exposed to userspace and are programmed directly
4241  * by the compute runtime.
4242  */
4243 /**
4244  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4245  *
4246  * @rdev: radeon_device pointer
4247  * @enable: enable or disable the MEs
4248  *
4249  * Halts or unhalts the gfx MEs.
4250  */
cik_cp_gfx_enable(struct radeon_device * rdev,bool enable)4251 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4252 {
4253 	if (enable)
4254 		WREG32(CP_ME_CNTL, 0);
4255 	else {
4256 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4257 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4258 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4259 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4260 	}
4261 	udelay(50);
4262 }
4263 
4264 /**
4265  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4266  *
4267  * @rdev: radeon_device pointer
4268  *
4269  * Loads the gfx PFP, ME, and CE ucode.
4270  * Returns 0 for success, -EINVAL if the ucode is not available.
4271  */
cik_cp_gfx_load_microcode(struct radeon_device * rdev)4272 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4273 {
4274 	int i;
4275 
4276 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4277 		return -EINVAL;
4278 
4279 	cik_cp_gfx_enable(rdev, false);
4280 
4281 	if (rdev->new_fw) {
4282 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4283 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4284 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4285 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4286 		const struct gfx_firmware_header_v1_0 *me_hdr =
4287 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4288 		const __le32 *fw_data;
4289 		u32 fw_size;
4290 
4291 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4292 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4293 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4294 
4295 		/* PFP */
4296 		fw_data = (const __le32 *)
4297 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4298 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4299 		WREG32(CP_PFP_UCODE_ADDR, 0);
4300 		for (i = 0; i < fw_size; i++)
4301 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4302 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4303 
4304 		/* CE */
4305 		fw_data = (const __le32 *)
4306 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4307 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4308 		WREG32(CP_CE_UCODE_ADDR, 0);
4309 		for (i = 0; i < fw_size; i++)
4310 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4311 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4312 
4313 		/* ME */
4314 		fw_data = (const __be32 *)
4315 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4316 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4317 		WREG32(CP_ME_RAM_WADDR, 0);
4318 		for (i = 0; i < fw_size; i++)
4319 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4320 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4321 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4322 	} else {
4323 		const __be32 *fw_data;
4324 
4325 		/* PFP */
4326 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4327 		WREG32(CP_PFP_UCODE_ADDR, 0);
4328 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4329 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4330 		WREG32(CP_PFP_UCODE_ADDR, 0);
4331 
4332 		/* CE */
4333 		fw_data = (const __be32 *)rdev->ce_fw->data;
4334 		WREG32(CP_CE_UCODE_ADDR, 0);
4335 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4336 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4337 		WREG32(CP_CE_UCODE_ADDR, 0);
4338 
4339 		/* ME */
4340 		fw_data = (const __be32 *)rdev->me_fw->data;
4341 		WREG32(CP_ME_RAM_WADDR, 0);
4342 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4343 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4344 		WREG32(CP_ME_RAM_WADDR, 0);
4345 	}
4346 
4347 	return 0;
4348 }
4349 
4350 /**
4351  * cik_cp_gfx_start - start the gfx ring
4352  *
4353  * @rdev: radeon_device pointer
4354  *
4355  * Enables the ring and loads the clear state context and other
4356  * packets required to init the ring.
4357  * Returns 0 for success, error for failure.
4358  */
cik_cp_gfx_start(struct radeon_device * rdev)4359 static int cik_cp_gfx_start(struct radeon_device *rdev)
4360 {
4361 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4362 	int r, i;
4363 
4364 	/* init the CP */
4365 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4366 	WREG32(CP_ENDIAN_SWAP, 0);
4367 	WREG32(CP_DEVICE_ID, 1);
4368 
4369 	cik_cp_gfx_enable(rdev, true);
4370 
4371 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4372 	if (r) {
4373 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4374 		return r;
4375 	}
4376 
4377 	/* init the CE partitions.  CE only used for gfx on CIK */
4378 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4379 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4380 	radeon_ring_write(ring, 0x8000);
4381 	radeon_ring_write(ring, 0x8000);
4382 
4383 	/* setup clear context state */
4384 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4385 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4386 
4387 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4388 	radeon_ring_write(ring, 0x80000000);
4389 	radeon_ring_write(ring, 0x80000000);
4390 
4391 	for (i = 0; i < cik_default_size; i++)
4392 		radeon_ring_write(ring, cik_default_state[i]);
4393 
4394 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4395 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4396 
4397 	/* set clear context state */
4398 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4399 	radeon_ring_write(ring, 0);
4400 
4401 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4402 	radeon_ring_write(ring, 0x00000316);
4403 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4404 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4405 
4406 	radeon_ring_unlock_commit(rdev, ring, false);
4407 
4408 	return 0;
4409 }
4410 
4411 /**
4412  * cik_cp_gfx_fini - stop the gfx ring
4413  *
4414  * @rdev: radeon_device pointer
4415  *
4416  * Stop the gfx ring and tear down the driver ring
4417  * info.
4418  */
cik_cp_gfx_fini(struct radeon_device * rdev)4419 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4420 {
4421 	cik_cp_gfx_enable(rdev, false);
4422 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4423 }
4424 
4425 /**
4426  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4427  *
4428  * @rdev: radeon_device pointer
4429  *
4430  * Program the location and size of the gfx ring buffer
4431  * and test it to make sure it's working.
4432  * Returns 0 for success, error for failure.
4433  */
cik_cp_gfx_resume(struct radeon_device * rdev)4434 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4435 {
4436 	struct radeon_ring *ring;
4437 	u32 tmp;
4438 	u32 rb_bufsz;
4439 	u64 rb_addr;
4440 	int r;
4441 
4442 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4443 	if (rdev->family != CHIP_HAWAII)
4444 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4445 
4446 	/* Set the write pointer delay */
4447 	WREG32(CP_RB_WPTR_DELAY, 0);
4448 
4449 	/* set the RB to use vmid 0 */
4450 	WREG32(CP_RB_VMID, 0);
4451 
4452 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4453 
4454 	/* ring 0 - compute and gfx */
4455 	/* Set ring buffer size */
4456 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4457 	rb_bufsz = order_base_2(ring->ring_size / 8);
4458 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4459 #ifdef __BIG_ENDIAN
4460 	tmp |= BUF_SWAP_32BIT;
4461 #endif
4462 	WREG32(CP_RB0_CNTL, tmp);
4463 
4464 	/* Initialize the ring buffer's read and write pointers */
4465 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4466 	ring->wptr = 0;
4467 	WREG32(CP_RB0_WPTR, ring->wptr);
4468 
4469 	/* set the wb address wether it's enabled or not */
4470 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4471 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4472 
4473 	/* scratch register shadowing is no longer supported */
4474 	WREG32(SCRATCH_UMSK, 0);
4475 
4476 	if (!rdev->wb.enabled)
4477 		tmp |= RB_NO_UPDATE;
4478 
4479 	mdelay(1);
4480 	WREG32(CP_RB0_CNTL, tmp);
4481 
4482 	rb_addr = ring->gpu_addr >> 8;
4483 	WREG32(CP_RB0_BASE, rb_addr);
4484 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4485 
4486 	/* start the ring */
4487 	cik_cp_gfx_start(rdev);
4488 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4489 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4490 	if (r) {
4491 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4492 		return r;
4493 	}
4494 
4495 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4496 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4497 
4498 	return 0;
4499 }
4500 
cik_gfx_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4501 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4502 		     struct radeon_ring *ring)
4503 {
4504 	u32 rptr;
4505 
4506 	if (rdev->wb.enabled)
4507 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4508 	else
4509 		rptr = RREG32(CP_RB0_RPTR);
4510 
4511 	return rptr;
4512 }
4513 
cik_gfx_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4514 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4515 		     struct radeon_ring *ring)
4516 {
4517 	u32 wptr;
4518 
4519 	wptr = RREG32(CP_RB0_WPTR);
4520 
4521 	return wptr;
4522 }
4523 
cik_gfx_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4524 void cik_gfx_set_wptr(struct radeon_device *rdev,
4525 		      struct radeon_ring *ring)
4526 {
4527 	WREG32(CP_RB0_WPTR, ring->wptr);
4528 	(void)RREG32(CP_RB0_WPTR);
4529 }
4530 
cik_compute_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4531 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4532 			 struct radeon_ring *ring)
4533 {
4534 	u32 rptr;
4535 
4536 	if (rdev->wb.enabled) {
4537 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4538 	} else {
4539 		mutex_lock(&rdev->srbm_mutex);
4540 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4541 		rptr = RREG32(CP_HQD_PQ_RPTR);
4542 		cik_srbm_select(rdev, 0, 0, 0, 0);
4543 		mutex_unlock(&rdev->srbm_mutex);
4544 	}
4545 
4546 	return rptr;
4547 }
4548 
cik_compute_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4549 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4550 			 struct radeon_ring *ring)
4551 {
4552 	u32 wptr;
4553 
4554 	if (rdev->wb.enabled) {
4555 		/* XXX check if swapping is necessary on BE */
4556 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4557 	} else {
4558 		mutex_lock(&rdev->srbm_mutex);
4559 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4560 		wptr = RREG32(CP_HQD_PQ_WPTR);
4561 		cik_srbm_select(rdev, 0, 0, 0, 0);
4562 		mutex_unlock(&rdev->srbm_mutex);
4563 	}
4564 
4565 	return wptr;
4566 }
4567 
cik_compute_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4568 void cik_compute_set_wptr(struct radeon_device *rdev,
4569 			  struct radeon_ring *ring)
4570 {
4571 	/* XXX check if swapping is necessary on BE */
4572 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4573 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4574 }
4575 
cik_compute_stop(struct radeon_device * rdev,struct radeon_ring * ring)4576 static void cik_compute_stop(struct radeon_device *rdev,
4577 			     struct radeon_ring *ring)
4578 {
4579 	u32 j, tmp;
4580 
4581 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4582 	/* Disable wptr polling. */
4583 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4584 	tmp &= ~WPTR_POLL_EN;
4585 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4586 	/* Disable HQD. */
4587 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4588 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4589 		for (j = 0; j < rdev->usec_timeout; j++) {
4590 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4591 				break;
4592 			udelay(1);
4593 		}
4594 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4595 		WREG32(CP_HQD_PQ_RPTR, 0);
4596 		WREG32(CP_HQD_PQ_WPTR, 0);
4597 	}
4598 	cik_srbm_select(rdev, 0, 0, 0, 0);
4599 }
4600 
4601 /**
4602  * cik_cp_compute_enable - enable/disable the compute CP MEs
4603  *
4604  * @rdev: radeon_device pointer
4605  * @enable: enable or disable the MEs
4606  *
4607  * Halts or unhalts the compute MEs.
4608  */
cik_cp_compute_enable(struct radeon_device * rdev,bool enable)4609 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4610 {
4611 	if (enable)
4612 		WREG32(CP_MEC_CNTL, 0);
4613 	else {
4614 		/*
4615 		 * To make hibernation reliable we need to clear compute ring
4616 		 * configuration before halting the compute ring.
4617 		 */
4618 		mutex_lock(&rdev->srbm_mutex);
4619 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4620 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4621 		mutex_unlock(&rdev->srbm_mutex);
4622 
4623 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4624 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4625 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4626 	}
4627 	udelay(50);
4628 }
4629 
4630 /**
4631  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4632  *
4633  * @rdev: radeon_device pointer
4634  *
4635  * Loads the compute MEC1&2 ucode.
4636  * Returns 0 for success, -EINVAL if the ucode is not available.
4637  */
cik_cp_compute_load_microcode(struct radeon_device * rdev)4638 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4639 {
4640 	int i;
4641 
4642 	if (!rdev->mec_fw)
4643 		return -EINVAL;
4644 
4645 	cik_cp_compute_enable(rdev, false);
4646 
4647 	if (rdev->new_fw) {
4648 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4649 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4650 		const __le32 *fw_data;
4651 		u32 fw_size;
4652 
4653 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4654 
4655 		/* MEC1 */
4656 		fw_data = (const __le32 *)
4657 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4658 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4659 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4660 		for (i = 0; i < fw_size; i++)
4661 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4662 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4663 
4664 		/* MEC2 */
4665 		if (rdev->family == CHIP_KAVERI) {
4666 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4667 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4668 
4669 			fw_data = (const __le32 *)
4670 				(rdev->mec2_fw->data +
4671 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4672 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4673 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4674 			for (i = 0; i < fw_size; i++)
4675 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4676 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4677 		}
4678 	} else {
4679 		const __be32 *fw_data;
4680 
4681 		/* MEC1 */
4682 		fw_data = (const __be32 *)rdev->mec_fw->data;
4683 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4684 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4685 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4686 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4687 
4688 		if (rdev->family == CHIP_KAVERI) {
4689 			/* MEC2 */
4690 			fw_data = (const __be32 *)rdev->mec_fw->data;
4691 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4692 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4693 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4694 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4695 		}
4696 	}
4697 
4698 	return 0;
4699 }
4700 
4701 /**
4702  * cik_cp_compute_start - start the compute queues
4703  *
4704  * @rdev: radeon_device pointer
4705  *
4706  * Enable the compute queues.
4707  * Returns 0 for success, error for failure.
4708  */
cik_cp_compute_start(struct radeon_device * rdev)4709 static int cik_cp_compute_start(struct radeon_device *rdev)
4710 {
4711 	cik_cp_compute_enable(rdev, true);
4712 
4713 	return 0;
4714 }
4715 
4716 /**
4717  * cik_cp_compute_fini - stop the compute queues
4718  *
4719  * @rdev: radeon_device pointer
4720  *
4721  * Stop the compute queues and tear down the driver queue
4722  * info.
4723  */
cik_cp_compute_fini(struct radeon_device * rdev)4724 static void cik_cp_compute_fini(struct radeon_device *rdev)
4725 {
4726 	int i, idx, r;
4727 
4728 	cik_cp_compute_enable(rdev, false);
4729 
4730 	for (i = 0; i < 2; i++) {
4731 		if (i == 0)
4732 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4733 		else
4734 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4735 
4736 		if (rdev->ring[idx].mqd_obj) {
4737 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4738 			if (unlikely(r != 0))
4739 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4740 
4741 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4742 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4743 
4744 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4745 			rdev->ring[idx].mqd_obj = NULL;
4746 		}
4747 	}
4748 }
4749 
cik_mec_fini(struct radeon_device * rdev)4750 static void cik_mec_fini(struct radeon_device *rdev)
4751 {
4752 	int r;
4753 
4754 	if (rdev->mec.hpd_eop_obj) {
4755 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4756 		if (unlikely(r != 0))
4757 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4758 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4759 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4760 
4761 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4762 		rdev->mec.hpd_eop_obj = NULL;
4763 	}
4764 }
4765 
4766 #define MEC_HPD_SIZE 2048
4767 
cik_mec_init(struct radeon_device * rdev)4768 static int cik_mec_init(struct radeon_device *rdev)
4769 {
4770 	int r;
4771 	u32 *hpd;
4772 
4773 	/*
4774 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4775 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4776 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4777 	 * be handled by KFD
4778 	 */
4779 	rdev->mec.num_mec = 1;
4780 	rdev->mec.num_pipe = 1;
4781 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4782 
4783 	if (rdev->mec.hpd_eop_obj == NULL) {
4784 		r = radeon_bo_create(rdev,
4785 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4786 				     PAGE_SIZE, true,
4787 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4788 				     &rdev->mec.hpd_eop_obj);
4789 		if (r) {
4790 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4791 			return r;
4792 		}
4793 	}
4794 
4795 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4796 	if (unlikely(r != 0)) {
4797 		cik_mec_fini(rdev);
4798 		return r;
4799 	}
4800 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4801 			  &rdev->mec.hpd_eop_gpu_addr);
4802 	if (r) {
4803 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4804 		cik_mec_fini(rdev);
4805 		return r;
4806 	}
4807 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4808 	if (r) {
4809 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4810 		cik_mec_fini(rdev);
4811 		return r;
4812 	}
4813 
4814 	/* clear memory.  Not sure if this is required or not */
4815 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4816 
4817 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4818 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4819 
4820 	return 0;
4821 }
4822 
4823 struct hqd_registers
4824 {
4825 	u32 cp_mqd_base_addr;
4826 	u32 cp_mqd_base_addr_hi;
4827 	u32 cp_hqd_active;
4828 	u32 cp_hqd_vmid;
4829 	u32 cp_hqd_persistent_state;
4830 	u32 cp_hqd_pipe_priority;
4831 	u32 cp_hqd_queue_priority;
4832 	u32 cp_hqd_quantum;
4833 	u32 cp_hqd_pq_base;
4834 	u32 cp_hqd_pq_base_hi;
4835 	u32 cp_hqd_pq_rptr;
4836 	u32 cp_hqd_pq_rptr_report_addr;
4837 	u32 cp_hqd_pq_rptr_report_addr_hi;
4838 	u32 cp_hqd_pq_wptr_poll_addr;
4839 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4840 	u32 cp_hqd_pq_doorbell_control;
4841 	u32 cp_hqd_pq_wptr;
4842 	u32 cp_hqd_pq_control;
4843 	u32 cp_hqd_ib_base_addr;
4844 	u32 cp_hqd_ib_base_addr_hi;
4845 	u32 cp_hqd_ib_rptr;
4846 	u32 cp_hqd_ib_control;
4847 	u32 cp_hqd_iq_timer;
4848 	u32 cp_hqd_iq_rptr;
4849 	u32 cp_hqd_dequeue_request;
4850 	u32 cp_hqd_dma_offload;
4851 	u32 cp_hqd_sema_cmd;
4852 	u32 cp_hqd_msg_type;
4853 	u32 cp_hqd_atomic0_preop_lo;
4854 	u32 cp_hqd_atomic0_preop_hi;
4855 	u32 cp_hqd_atomic1_preop_lo;
4856 	u32 cp_hqd_atomic1_preop_hi;
4857 	u32 cp_hqd_hq_scheduler0;
4858 	u32 cp_hqd_hq_scheduler1;
4859 	u32 cp_mqd_control;
4860 };
4861 
4862 struct bonaire_mqd
4863 {
4864 	u32 header;
4865 	u32 dispatch_initiator;
4866 	u32 dimensions[3];
4867 	u32 start_idx[3];
4868 	u32 num_threads[3];
4869 	u32 pipeline_stat_enable;
4870 	u32 perf_counter_enable;
4871 	u32 pgm[2];
4872 	u32 tba[2];
4873 	u32 tma[2];
4874 	u32 pgm_rsrc[2];
4875 	u32 vmid;
4876 	u32 resource_limits;
4877 	u32 static_thread_mgmt01[2];
4878 	u32 tmp_ring_size;
4879 	u32 static_thread_mgmt23[2];
4880 	u32 restart[3];
4881 	u32 thread_trace_enable;
4882 	u32 reserved1;
4883 	u32 user_data[16];
4884 	u32 vgtcs_invoke_count[2];
4885 	struct hqd_registers queue_state;
4886 	u32 dequeue_cntr;
4887 	u32 interrupt_queue[64];
4888 };
4889 
4890 /**
4891  * cik_cp_compute_resume - setup the compute queue registers
4892  *
4893  * @rdev: radeon_device pointer
4894  *
4895  * Program the compute queues and test them to make sure they
4896  * are working.
4897  * Returns 0 for success, error for failure.
4898  */
cik_cp_compute_resume(struct radeon_device * rdev)4899 static int cik_cp_compute_resume(struct radeon_device *rdev)
4900 {
4901 	int r, i, j, idx;
4902 	u32 tmp;
4903 	bool use_doorbell = true;
4904 	u64 hqd_gpu_addr;
4905 	u64 mqd_gpu_addr;
4906 	u64 eop_gpu_addr;
4907 	u64 wb_gpu_addr;
4908 	u32 *buf;
4909 	struct bonaire_mqd *mqd;
4910 
4911 	r = cik_cp_compute_start(rdev);
4912 	if (r)
4913 		return r;
4914 
4915 	/* fix up chicken bits */
4916 	tmp = RREG32(CP_CPF_DEBUG);
4917 	tmp |= (1 << 23);
4918 	WREG32(CP_CPF_DEBUG, tmp);
4919 
4920 	/* init the pipes */
4921 	mutex_lock(&rdev->srbm_mutex);
4922 
4923 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4924 
4925 	cik_srbm_select(rdev, 0, 0, 0, 0);
4926 
4927 	/* write the EOP addr */
4928 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4929 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4930 
4931 	/* set the VMID assigned */
4932 	WREG32(CP_HPD_EOP_VMID, 0);
4933 
4934 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4935 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4936 	tmp &= ~EOP_SIZE_MASK;
4937 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4938 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4939 
4940 	mutex_unlock(&rdev->srbm_mutex);
4941 
4942 	/* init the queues.  Just two for now. */
4943 	for (i = 0; i < 2; i++) {
4944 		if (i == 0)
4945 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4946 		else
4947 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4948 
4949 		if (rdev->ring[idx].mqd_obj == NULL) {
4950 			r = radeon_bo_create(rdev,
4951 					     sizeof(struct bonaire_mqd),
4952 					     PAGE_SIZE, true,
4953 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4954 					     NULL, &rdev->ring[idx].mqd_obj);
4955 			if (r) {
4956 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4957 				return r;
4958 			}
4959 		}
4960 
4961 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4962 		if (unlikely(r != 0)) {
4963 			cik_cp_compute_fini(rdev);
4964 			return r;
4965 		}
4966 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4967 				  &mqd_gpu_addr);
4968 		if (r) {
4969 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4970 			cik_cp_compute_fini(rdev);
4971 			return r;
4972 		}
4973 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4974 		if (r) {
4975 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4976 			cik_cp_compute_fini(rdev);
4977 			return r;
4978 		}
4979 
4980 		/* init the mqd struct */
4981 		memset(buf, 0, sizeof(struct bonaire_mqd));
4982 
4983 		mqd = (struct bonaire_mqd *)buf;
4984 		mqd->header = 0xC0310800;
4985 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4986 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4987 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4988 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4989 
4990 		mutex_lock(&rdev->srbm_mutex);
4991 		cik_srbm_select(rdev, rdev->ring[idx].me,
4992 				rdev->ring[idx].pipe,
4993 				rdev->ring[idx].queue, 0);
4994 
4995 		/* disable wptr polling */
4996 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4997 		tmp &= ~WPTR_POLL_EN;
4998 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4999 
5000 		/* enable doorbell? */
5001 		mqd->queue_state.cp_hqd_pq_doorbell_control =
5002 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5003 		if (use_doorbell)
5004 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5005 		else
5006 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5007 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5008 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5009 
5010 		/* disable the queue if it's active */
5011 		mqd->queue_state.cp_hqd_dequeue_request = 0;
5012 		mqd->queue_state.cp_hqd_pq_rptr = 0;
5013 		mqd->queue_state.cp_hqd_pq_wptr= 0;
5014 		if (RREG32(CP_HQD_ACTIVE) & 1) {
5015 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5016 			for (j = 0; j < rdev->usec_timeout; j++) {
5017 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
5018 					break;
5019 				udelay(1);
5020 			}
5021 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5022 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5023 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5024 		}
5025 
5026 		/* set the pointer to the MQD */
5027 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5028 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5029 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5030 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5031 		/* set MQD vmid to 0 */
5032 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5033 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5034 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5035 
5036 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5037 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5038 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5039 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5040 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5041 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5042 
5043 		/* set up the HQD, this is similar to CP_RB0_CNTL */
5044 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5045 		mqd->queue_state.cp_hqd_pq_control &=
5046 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5047 
5048 		mqd->queue_state.cp_hqd_pq_control |=
5049 			order_base_2(rdev->ring[idx].ring_size / 8);
5050 		mqd->queue_state.cp_hqd_pq_control |=
5051 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5052 #ifdef __BIG_ENDIAN
5053 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5054 #endif
5055 		mqd->queue_state.cp_hqd_pq_control &=
5056 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5057 		mqd->queue_state.cp_hqd_pq_control |=
5058 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5059 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5060 
5061 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5062 		if (i == 0)
5063 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5064 		else
5065 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5066 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5067 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5068 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5069 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5070 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5071 
5072 		/* set the wb address wether it's enabled or not */
5073 		if (i == 0)
5074 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5075 		else
5076 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5077 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5078 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5079 			upper_32_bits(wb_gpu_addr) & 0xffff;
5080 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5081 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5082 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5083 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5084 
5085 		/* enable the doorbell if requested */
5086 		if (use_doorbell) {
5087 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5088 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5089 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5090 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5091 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5092 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5093 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5094 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5095 
5096 		} else {
5097 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5098 		}
5099 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5100 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5101 
5102 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5103 		rdev->ring[idx].wptr = 0;
5104 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5105 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5106 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5107 
5108 		/* set the vmid for the queue */
5109 		mqd->queue_state.cp_hqd_vmid = 0;
5110 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5111 
5112 		/* activate the queue */
5113 		mqd->queue_state.cp_hqd_active = 1;
5114 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5115 
5116 		cik_srbm_select(rdev, 0, 0, 0, 0);
5117 		mutex_unlock(&rdev->srbm_mutex);
5118 
5119 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5120 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5121 
5122 		rdev->ring[idx].ready = true;
5123 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5124 		if (r)
5125 			rdev->ring[idx].ready = false;
5126 	}
5127 
5128 	return 0;
5129 }
5130 
cik_cp_enable(struct radeon_device * rdev,bool enable)5131 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5132 {
5133 	cik_cp_gfx_enable(rdev, enable);
5134 	cik_cp_compute_enable(rdev, enable);
5135 }
5136 
cik_cp_load_microcode(struct radeon_device * rdev)5137 static int cik_cp_load_microcode(struct radeon_device *rdev)
5138 {
5139 	int r;
5140 
5141 	r = cik_cp_gfx_load_microcode(rdev);
5142 	if (r)
5143 		return r;
5144 	r = cik_cp_compute_load_microcode(rdev);
5145 	if (r)
5146 		return r;
5147 
5148 	return 0;
5149 }
5150 
cik_cp_fini(struct radeon_device * rdev)5151 static void cik_cp_fini(struct radeon_device *rdev)
5152 {
5153 	cik_cp_gfx_fini(rdev);
5154 	cik_cp_compute_fini(rdev);
5155 }
5156 
cik_cp_resume(struct radeon_device * rdev)5157 static int cik_cp_resume(struct radeon_device *rdev)
5158 {
5159 	int r;
5160 
5161 	cik_enable_gui_idle_interrupt(rdev, false);
5162 
5163 	r = cik_cp_load_microcode(rdev);
5164 	if (r)
5165 		return r;
5166 
5167 	r = cik_cp_gfx_resume(rdev);
5168 	if (r)
5169 		return r;
5170 	r = cik_cp_compute_resume(rdev);
5171 	if (r)
5172 		return r;
5173 
5174 	cik_enable_gui_idle_interrupt(rdev, true);
5175 
5176 	return 0;
5177 }
5178 
cik_print_gpu_status_regs(struct radeon_device * rdev)5179 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5180 {
5181 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5182 		RREG32(GRBM_STATUS));
5183 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5184 		RREG32(GRBM_STATUS2));
5185 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5186 		RREG32(GRBM_STATUS_SE0));
5187 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5188 		RREG32(GRBM_STATUS_SE1));
5189 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5190 		RREG32(GRBM_STATUS_SE2));
5191 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5192 		RREG32(GRBM_STATUS_SE3));
5193 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5194 		RREG32(SRBM_STATUS));
5195 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5196 		RREG32(SRBM_STATUS2));
5197 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5198 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5199 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5200 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5201 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5202 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5203 		 RREG32(CP_STALLED_STAT1));
5204 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5205 		 RREG32(CP_STALLED_STAT2));
5206 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5207 		 RREG32(CP_STALLED_STAT3));
5208 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5209 		 RREG32(CP_CPF_BUSY_STAT));
5210 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5211 		 RREG32(CP_CPF_STALLED_STAT1));
5212 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5213 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5214 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5215 		 RREG32(CP_CPC_STALLED_STAT1));
5216 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5217 }
5218 
5219 /**
5220  * cik_gpu_check_soft_reset - check which blocks are busy
5221  *
5222  * @rdev: radeon_device pointer
5223  *
5224  * Check which blocks are busy and return the relevant reset
5225  * mask to be used by cik_gpu_soft_reset().
5226  * Returns a mask of the blocks to be reset.
5227  */
cik_gpu_check_soft_reset(struct radeon_device * rdev)5228 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5229 {
5230 	u32 reset_mask = 0;
5231 	u32 tmp;
5232 
5233 	/* GRBM_STATUS */
5234 	tmp = RREG32(GRBM_STATUS);
5235 	if (tmp & (PA_BUSY | SC_BUSY |
5236 		   BCI_BUSY | SX_BUSY |
5237 		   TA_BUSY | VGT_BUSY |
5238 		   DB_BUSY | CB_BUSY |
5239 		   GDS_BUSY | SPI_BUSY |
5240 		   IA_BUSY | IA_BUSY_NO_DMA))
5241 		reset_mask |= RADEON_RESET_GFX;
5242 
5243 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5244 		reset_mask |= RADEON_RESET_CP;
5245 
5246 	/* GRBM_STATUS2 */
5247 	tmp = RREG32(GRBM_STATUS2);
5248 	if (tmp & RLC_BUSY)
5249 		reset_mask |= RADEON_RESET_RLC;
5250 
5251 	/* SDMA0_STATUS_REG */
5252 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5253 	if (!(tmp & SDMA_IDLE))
5254 		reset_mask |= RADEON_RESET_DMA;
5255 
5256 	/* SDMA1_STATUS_REG */
5257 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5258 	if (!(tmp & SDMA_IDLE))
5259 		reset_mask |= RADEON_RESET_DMA1;
5260 
5261 	/* SRBM_STATUS2 */
5262 	tmp = RREG32(SRBM_STATUS2);
5263 	if (tmp & SDMA_BUSY)
5264 		reset_mask |= RADEON_RESET_DMA;
5265 
5266 	if (tmp & SDMA1_BUSY)
5267 		reset_mask |= RADEON_RESET_DMA1;
5268 
5269 	/* SRBM_STATUS */
5270 	tmp = RREG32(SRBM_STATUS);
5271 
5272 	if (tmp & IH_BUSY)
5273 		reset_mask |= RADEON_RESET_IH;
5274 
5275 	if (tmp & SEM_BUSY)
5276 		reset_mask |= RADEON_RESET_SEM;
5277 
5278 	if (tmp & GRBM_RQ_PENDING)
5279 		reset_mask |= RADEON_RESET_GRBM;
5280 
5281 	if (tmp & VMC_BUSY)
5282 		reset_mask |= RADEON_RESET_VMC;
5283 
5284 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5285 		   MCC_BUSY | MCD_BUSY))
5286 		reset_mask |= RADEON_RESET_MC;
5287 
5288 	if (evergreen_is_display_hung(rdev))
5289 		reset_mask |= RADEON_RESET_DISPLAY;
5290 
5291 	/* Skip MC reset as it's mostly likely not hung, just busy */
5292 	if (reset_mask & RADEON_RESET_MC) {
5293 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5294 		reset_mask &= ~RADEON_RESET_MC;
5295 	}
5296 
5297 	return reset_mask;
5298 }
5299 
5300 /**
5301  * cik_gpu_soft_reset - soft reset GPU
5302  *
5303  * @rdev: radeon_device pointer
5304  * @reset_mask: mask of which blocks to reset
5305  *
5306  * Soft reset the blocks specified in @reset_mask.
5307  */
cik_gpu_soft_reset(struct radeon_device * rdev,u32 reset_mask)5308 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5309 {
5310 	struct evergreen_mc_save save;
5311 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5312 	u32 tmp;
5313 
5314 	if (reset_mask == 0)
5315 		return;
5316 
5317 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5318 
5319 	cik_print_gpu_status_regs(rdev);
5320 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5321 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5322 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5323 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5324 
5325 	/* disable CG/PG */
5326 	cik_fini_pg(rdev);
5327 	cik_fini_cg(rdev);
5328 
5329 	/* stop the rlc */
5330 	cik_rlc_stop(rdev);
5331 
5332 	/* Disable GFX parsing/prefetching */
5333 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5334 
5335 	/* Disable MEC parsing/prefetching */
5336 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5337 
5338 	if (reset_mask & RADEON_RESET_DMA) {
5339 		/* sdma0 */
5340 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5341 		tmp |= SDMA_HALT;
5342 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5343 	}
5344 	if (reset_mask & RADEON_RESET_DMA1) {
5345 		/* sdma1 */
5346 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5347 		tmp |= SDMA_HALT;
5348 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5349 	}
5350 
5351 	evergreen_mc_stop(rdev, &save);
5352 	if (evergreen_mc_wait_for_idle(rdev)) {
5353 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5354 	}
5355 
5356 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5357 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5358 
5359 	if (reset_mask & RADEON_RESET_CP) {
5360 		grbm_soft_reset |= SOFT_RESET_CP;
5361 
5362 		srbm_soft_reset |= SOFT_RESET_GRBM;
5363 	}
5364 
5365 	if (reset_mask & RADEON_RESET_DMA)
5366 		srbm_soft_reset |= SOFT_RESET_SDMA;
5367 
5368 	if (reset_mask & RADEON_RESET_DMA1)
5369 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5370 
5371 	if (reset_mask & RADEON_RESET_DISPLAY)
5372 		srbm_soft_reset |= SOFT_RESET_DC;
5373 
5374 	if (reset_mask & RADEON_RESET_RLC)
5375 		grbm_soft_reset |= SOFT_RESET_RLC;
5376 
5377 	if (reset_mask & RADEON_RESET_SEM)
5378 		srbm_soft_reset |= SOFT_RESET_SEM;
5379 
5380 	if (reset_mask & RADEON_RESET_IH)
5381 		srbm_soft_reset |= SOFT_RESET_IH;
5382 
5383 	if (reset_mask & RADEON_RESET_GRBM)
5384 		srbm_soft_reset |= SOFT_RESET_GRBM;
5385 
5386 	if (reset_mask & RADEON_RESET_VMC)
5387 		srbm_soft_reset |= SOFT_RESET_VMC;
5388 
5389 	if (!(rdev->flags & RADEON_IS_IGP)) {
5390 		if (reset_mask & RADEON_RESET_MC)
5391 			srbm_soft_reset |= SOFT_RESET_MC;
5392 	}
5393 
5394 	if (grbm_soft_reset) {
5395 		tmp = RREG32(GRBM_SOFT_RESET);
5396 		tmp |= grbm_soft_reset;
5397 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5398 		WREG32(GRBM_SOFT_RESET, tmp);
5399 		tmp = RREG32(GRBM_SOFT_RESET);
5400 
5401 		udelay(50);
5402 
5403 		tmp &= ~grbm_soft_reset;
5404 		WREG32(GRBM_SOFT_RESET, tmp);
5405 		tmp = RREG32(GRBM_SOFT_RESET);
5406 	}
5407 
5408 	if (srbm_soft_reset) {
5409 		tmp = RREG32(SRBM_SOFT_RESET);
5410 		tmp |= srbm_soft_reset;
5411 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5412 		WREG32(SRBM_SOFT_RESET, tmp);
5413 		tmp = RREG32(SRBM_SOFT_RESET);
5414 
5415 		udelay(50);
5416 
5417 		tmp &= ~srbm_soft_reset;
5418 		WREG32(SRBM_SOFT_RESET, tmp);
5419 		tmp = RREG32(SRBM_SOFT_RESET);
5420 	}
5421 
5422 	/* Wait a little for things to settle down */
5423 	udelay(50);
5424 
5425 	evergreen_mc_resume(rdev, &save);
5426 	udelay(50);
5427 
5428 	cik_print_gpu_status_regs(rdev);
5429 }
5430 
5431 struct kv_reset_save_regs {
5432 	u32 gmcon_reng_execute;
5433 	u32 gmcon_misc;
5434 	u32 gmcon_misc3;
5435 };
5436 
kv_save_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5437 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5438 				   struct kv_reset_save_regs *save)
5439 {
5440 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5441 	save->gmcon_misc = RREG32(GMCON_MISC);
5442 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5443 
5444 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5445 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5446 						STCTRL_STUTTER_EN));
5447 }
5448 
kv_restore_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5449 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5450 				      struct kv_reset_save_regs *save)
5451 {
5452 	int i;
5453 
5454 	WREG32(GMCON_PGFSM_WRITE, 0);
5455 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5456 
5457 	for (i = 0; i < 5; i++)
5458 		WREG32(GMCON_PGFSM_WRITE, 0);
5459 
5460 	WREG32(GMCON_PGFSM_WRITE, 0);
5461 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5462 
5463 	for (i = 0; i < 5; i++)
5464 		WREG32(GMCON_PGFSM_WRITE, 0);
5465 
5466 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5467 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5468 
5469 	for (i = 0; i < 5; i++)
5470 		WREG32(GMCON_PGFSM_WRITE, 0);
5471 
5472 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5473 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5474 
5475 	for (i = 0; i < 5; i++)
5476 		WREG32(GMCON_PGFSM_WRITE, 0);
5477 
5478 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5479 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5480 
5481 	for (i = 0; i < 5; i++)
5482 		WREG32(GMCON_PGFSM_WRITE, 0);
5483 
5484 	WREG32(GMCON_PGFSM_WRITE, 0);
5485 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5486 
5487 	for (i = 0; i < 5; i++)
5488 		WREG32(GMCON_PGFSM_WRITE, 0);
5489 
5490 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5491 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5492 
5493 	for (i = 0; i < 5; i++)
5494 		WREG32(GMCON_PGFSM_WRITE, 0);
5495 
5496 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5497 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5498 
5499 	for (i = 0; i < 5; i++)
5500 		WREG32(GMCON_PGFSM_WRITE, 0);
5501 
5502 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5503 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5504 
5505 	for (i = 0; i < 5; i++)
5506 		WREG32(GMCON_PGFSM_WRITE, 0);
5507 
5508 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5509 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5510 
5511 	for (i = 0; i < 5; i++)
5512 		WREG32(GMCON_PGFSM_WRITE, 0);
5513 
5514 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5515 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5516 
5517 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5518 	WREG32(GMCON_MISC, save->gmcon_misc);
5519 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5520 }
5521 
cik_gpu_pci_config_reset(struct radeon_device * rdev)5522 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5523 {
5524 	struct evergreen_mc_save save;
5525 	struct kv_reset_save_regs kv_save = { 0 };
5526 	u32 tmp, i;
5527 
5528 	dev_info(rdev->dev, "GPU pci config reset\n");
5529 
5530 	/* disable dpm? */
5531 
5532 	/* disable cg/pg */
5533 	cik_fini_pg(rdev);
5534 	cik_fini_cg(rdev);
5535 
5536 	/* Disable GFX parsing/prefetching */
5537 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5538 
5539 	/* Disable MEC parsing/prefetching */
5540 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5541 
5542 	/* sdma0 */
5543 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5544 	tmp |= SDMA_HALT;
5545 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5546 	/* sdma1 */
5547 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5548 	tmp |= SDMA_HALT;
5549 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5550 	/* XXX other engines? */
5551 
5552 	/* halt the rlc, disable cp internal ints */
5553 	cik_rlc_stop(rdev);
5554 
5555 	udelay(50);
5556 
5557 	/* disable mem access */
5558 	evergreen_mc_stop(rdev, &save);
5559 	if (evergreen_mc_wait_for_idle(rdev)) {
5560 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5561 	}
5562 
5563 	if (rdev->flags & RADEON_IS_IGP)
5564 		kv_save_regs_for_reset(rdev, &kv_save);
5565 
5566 	/* disable BM */
5567 	pci_clear_master(rdev->pdev);
5568 	/* reset */
5569 	radeon_pci_config_reset(rdev);
5570 
5571 	udelay(100);
5572 
5573 	/* wait for asic to come out of reset */
5574 	for (i = 0; i < rdev->usec_timeout; i++) {
5575 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5576 			break;
5577 		udelay(1);
5578 	}
5579 
5580 	/* does asic init need to be run first??? */
5581 	if (rdev->flags & RADEON_IS_IGP)
5582 		kv_restore_regs_for_reset(rdev, &kv_save);
5583 }
5584 
5585 /**
5586  * cik_asic_reset - soft reset GPU
5587  *
5588  * @rdev: radeon_device pointer
5589  *
5590  * Look up which blocks are hung and attempt
5591  * to reset them.
5592  * Returns 0 for success.
5593  */
cik_asic_reset(struct radeon_device * rdev)5594 int cik_asic_reset(struct radeon_device *rdev)
5595 {
5596 	u32 reset_mask;
5597 
5598 	reset_mask = cik_gpu_check_soft_reset(rdev);
5599 
5600 	if (reset_mask)
5601 		r600_set_bios_scratch_engine_hung(rdev, true);
5602 
5603 	/* try soft reset */
5604 	cik_gpu_soft_reset(rdev, reset_mask);
5605 
5606 	reset_mask = cik_gpu_check_soft_reset(rdev);
5607 
5608 	/* try pci config reset */
5609 	if (reset_mask && radeon_hard_reset)
5610 		cik_gpu_pci_config_reset(rdev);
5611 
5612 	reset_mask = cik_gpu_check_soft_reset(rdev);
5613 
5614 	if (!reset_mask)
5615 		r600_set_bios_scratch_engine_hung(rdev, false);
5616 
5617 	return 0;
5618 }
5619 
5620 /**
5621  * cik_gfx_is_lockup - check if the 3D engine is locked up
5622  *
5623  * @rdev: radeon_device pointer
5624  * @ring: radeon_ring structure holding ring information
5625  *
5626  * Check if the 3D engine is locked up (CIK).
5627  * Returns true if the engine is locked, false if not.
5628  */
cik_gfx_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)5629 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5630 {
5631 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5632 
5633 	if (!(reset_mask & (RADEON_RESET_GFX |
5634 			    RADEON_RESET_COMPUTE |
5635 			    RADEON_RESET_CP))) {
5636 		radeon_ring_lockup_update(rdev, ring);
5637 		return false;
5638 	}
5639 	return radeon_ring_test_lockup(rdev, ring);
5640 }
5641 
5642 /* MC */
5643 /**
5644  * cik_mc_program - program the GPU memory controller
5645  *
5646  * @rdev: radeon_device pointer
5647  *
5648  * Set the location of vram, gart, and AGP in the GPU's
5649  * physical address space (CIK).
5650  */
cik_mc_program(struct radeon_device * rdev)5651 static void cik_mc_program(struct radeon_device *rdev)
5652 {
5653 	struct evergreen_mc_save save;
5654 	u32 tmp;
5655 	int i, j;
5656 
5657 	/* Initialize HDP */
5658 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5659 		WREG32((0x2c14 + j), 0x00000000);
5660 		WREG32((0x2c18 + j), 0x00000000);
5661 		WREG32((0x2c1c + j), 0x00000000);
5662 		WREG32((0x2c20 + j), 0x00000000);
5663 		WREG32((0x2c24 + j), 0x00000000);
5664 	}
5665 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5666 
5667 	evergreen_mc_stop(rdev, &save);
5668 	if (radeon_mc_wait_for_idle(rdev)) {
5669 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5670 	}
5671 	/* Lockout access through VGA aperture*/
5672 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5673 	/* Update configuration */
5674 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5675 	       rdev->mc.vram_start >> 12);
5676 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5677 	       rdev->mc.vram_end >> 12);
5678 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5679 	       rdev->vram_scratch.gpu_addr >> 12);
5680 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5681 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5682 	WREG32(MC_VM_FB_LOCATION, tmp);
5683 	/* XXX double check these! */
5684 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5685 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5686 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5687 	WREG32(MC_VM_AGP_BASE, 0);
5688 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5689 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5690 	if (radeon_mc_wait_for_idle(rdev)) {
5691 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5692 	}
5693 	evergreen_mc_resume(rdev, &save);
5694 	/* we need to own VRAM, so turn off the VGA renderer here
5695 	 * to stop it overwriting our objects */
5696 	rv515_vga_render_disable(rdev);
5697 }
5698 
5699 /**
5700  * cik_mc_init - initialize the memory controller driver params
5701  *
5702  * @rdev: radeon_device pointer
5703  *
5704  * Look up the amount of vram, vram width, and decide how to place
5705  * vram and gart within the GPU's physical address space (CIK).
5706  * Returns 0 for success.
5707  */
cik_mc_init(struct radeon_device * rdev)5708 static int cik_mc_init(struct radeon_device *rdev)
5709 {
5710 	u32 tmp;
5711 	int chansize, numchan;
5712 
5713 	/* Get VRAM informations */
5714 	rdev->mc.vram_is_ddr = true;
5715 	tmp = RREG32(MC_ARB_RAMCFG);
5716 	if (tmp & CHANSIZE_MASK) {
5717 		chansize = 64;
5718 	} else {
5719 		chansize = 32;
5720 	}
5721 	tmp = RREG32(MC_SHARED_CHMAP);
5722 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5723 	case 0:
5724 	default:
5725 		numchan = 1;
5726 		break;
5727 	case 1:
5728 		numchan = 2;
5729 		break;
5730 	case 2:
5731 		numchan = 4;
5732 		break;
5733 	case 3:
5734 		numchan = 8;
5735 		break;
5736 	case 4:
5737 		numchan = 3;
5738 		break;
5739 	case 5:
5740 		numchan = 6;
5741 		break;
5742 	case 6:
5743 		numchan = 10;
5744 		break;
5745 	case 7:
5746 		numchan = 12;
5747 		break;
5748 	case 8:
5749 		numchan = 16;
5750 		break;
5751 	}
5752 	rdev->mc.vram_width = numchan * chansize;
5753 	/* Could aper size report 0 ? */
5754 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5755 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5756 	/* size in MB on si */
5757 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5758 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5759 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5760 	si_vram_gtt_location(rdev, &rdev->mc);
5761 	radeon_update_bandwidth_info(rdev);
5762 
5763 	return 0;
5764 }
5765 
5766 /*
5767  * GART
5768  * VMID 0 is the physical GPU addresses as used by the kernel.
5769  * VMIDs 1-15 are used for userspace clients and are handled
5770  * by the radeon vm/hsa code.
5771  */
5772 /**
5773  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5774  *
5775  * @rdev: radeon_device pointer
5776  *
5777  * Flush the TLB for the VMID 0 page table (CIK).
5778  */
cik_pcie_gart_tlb_flush(struct radeon_device * rdev)5779 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5780 {
5781 	/* flush hdp cache */
5782 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5783 
5784 	/* bits 0-15 are the VM contexts0-15 */
5785 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5786 }
5787 
cik_pcie_init_compute_vmid(struct radeon_device * rdev)5788 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5789 {
5790 	int i;
5791 	uint32_t sh_mem_bases, sh_mem_config;
5792 
5793 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5794 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5795 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5796 
5797 	mutex_lock(&rdev->srbm_mutex);
5798 	for (i = 8; i < 16; i++) {
5799 		cik_srbm_select(rdev, 0, 0, 0, i);
5800 		/* CP and shaders */
5801 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5802 		WREG32(SH_MEM_APE1_BASE, 1);
5803 		WREG32(SH_MEM_APE1_LIMIT, 0);
5804 		WREG32(SH_MEM_BASES, sh_mem_bases);
5805 	}
5806 	cik_srbm_select(rdev, 0, 0, 0, 0);
5807 	mutex_unlock(&rdev->srbm_mutex);
5808 }
5809 
5810 /**
5811  * cik_pcie_gart_enable - gart enable
5812  *
5813  * @rdev: radeon_device pointer
5814  *
5815  * This sets up the TLBs, programs the page tables for VMID0,
5816  * sets up the hw for VMIDs 1-15 which are allocated on
5817  * demand, and sets up the global locations for the LDS, GDS,
5818  * and GPUVM for FSA64 clients (CIK).
5819  * Returns 0 for success, errors for failure.
5820  */
cik_pcie_gart_enable(struct radeon_device * rdev)5821 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5822 {
5823 	int r, i;
5824 
5825 	if (rdev->gart.robj == NULL) {
5826 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5827 		return -EINVAL;
5828 	}
5829 	r = radeon_gart_table_vram_pin(rdev);
5830 	if (r)
5831 		return r;
5832 	/* Setup TLB control */
5833 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5834 	       (0xA << 7) |
5835 	       ENABLE_L1_TLB |
5836 	       ENABLE_L1_FRAGMENT_PROCESSING |
5837 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5838 	       ENABLE_ADVANCED_DRIVER_MODEL |
5839 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5840 	/* Setup L2 cache */
5841 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5842 	       ENABLE_L2_FRAGMENT_PROCESSING |
5843 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5844 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5845 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5846 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5847 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5848 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5849 	       BANK_SELECT(4) |
5850 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5851 	/* setup context0 */
5852 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5853 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5854 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5855 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5856 			(u32)(rdev->dummy_page.addr >> 12));
5857 	WREG32(VM_CONTEXT0_CNTL2, 0);
5858 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5859 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5860 
5861 	WREG32(0x15D4, 0);
5862 	WREG32(0x15D8, 0);
5863 	WREG32(0x15DC, 0);
5864 
5865 	/* restore context1-15 */
5866 	/* set vm size, must be a multiple of 4 */
5867 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5868 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5869 	for (i = 1; i < 16; i++) {
5870 		if (i < 8)
5871 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5872 			       rdev->vm_manager.saved_table_addr[i]);
5873 		else
5874 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5875 			       rdev->vm_manager.saved_table_addr[i]);
5876 	}
5877 
5878 	/* enable context1-15 */
5879 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5880 	       (u32)(rdev->dummy_page.addr >> 12));
5881 	WREG32(VM_CONTEXT1_CNTL2, 4);
5882 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5883 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5884 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5885 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5886 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5887 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5888 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5889 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5890 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5891 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5892 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5893 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5894 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5895 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5896 
5897 	if (rdev->family == CHIP_KAVERI) {
5898 		u32 tmp = RREG32(CHUB_CONTROL);
5899 		tmp &= ~BYPASS_VM;
5900 		WREG32(CHUB_CONTROL, tmp);
5901 	}
5902 
5903 	/* XXX SH_MEM regs */
5904 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5905 	mutex_lock(&rdev->srbm_mutex);
5906 	for (i = 0; i < 16; i++) {
5907 		cik_srbm_select(rdev, 0, 0, 0, i);
5908 		/* CP and shaders */
5909 		WREG32(SH_MEM_CONFIG, 0);
5910 		WREG32(SH_MEM_APE1_BASE, 1);
5911 		WREG32(SH_MEM_APE1_LIMIT, 0);
5912 		WREG32(SH_MEM_BASES, 0);
5913 		/* SDMA GFX */
5914 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5915 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5916 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5917 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5918 		/* XXX SDMA RLC - todo */
5919 	}
5920 	cik_srbm_select(rdev, 0, 0, 0, 0);
5921 	mutex_unlock(&rdev->srbm_mutex);
5922 
5923 	cik_pcie_init_compute_vmid(rdev);
5924 
5925 	cik_pcie_gart_tlb_flush(rdev);
5926 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5927 		 (unsigned)(rdev->mc.gtt_size >> 20),
5928 		 (unsigned long long)rdev->gart.table_addr);
5929 	rdev->gart.ready = true;
5930 	return 0;
5931 }
5932 
5933 /**
5934  * cik_pcie_gart_disable - gart disable
5935  *
5936  * @rdev: radeon_device pointer
5937  *
5938  * This disables all VM page table (CIK).
5939  */
cik_pcie_gart_disable(struct radeon_device * rdev)5940 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5941 {
5942 	unsigned i;
5943 
5944 	for (i = 1; i < 16; ++i) {
5945 		uint32_t reg;
5946 		if (i < 8)
5947 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5948 		else
5949 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5950 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5951 	}
5952 
5953 	/* Disable all tables */
5954 	WREG32(VM_CONTEXT0_CNTL, 0);
5955 	WREG32(VM_CONTEXT1_CNTL, 0);
5956 	/* Setup TLB control */
5957 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5958 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5959 	/* Setup L2 cache */
5960 	WREG32(VM_L2_CNTL,
5961 	       ENABLE_L2_FRAGMENT_PROCESSING |
5962 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5963 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5964 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5965 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5966 	WREG32(VM_L2_CNTL2, 0);
5967 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5968 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5969 	radeon_gart_table_vram_unpin(rdev);
5970 }
5971 
5972 /**
5973  * cik_pcie_gart_fini - vm fini callback
5974  *
5975  * @rdev: radeon_device pointer
5976  *
5977  * Tears down the driver GART/VM setup (CIK).
5978  */
cik_pcie_gart_fini(struct radeon_device * rdev)5979 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5980 {
5981 	cik_pcie_gart_disable(rdev);
5982 	radeon_gart_table_vram_free(rdev);
5983 	radeon_gart_fini(rdev);
5984 }
5985 
5986 /* vm parser */
5987 /**
5988  * cik_ib_parse - vm ib_parse callback
5989  *
5990  * @rdev: radeon_device pointer
5991  * @ib: indirect buffer pointer
5992  *
5993  * CIK uses hw IB checking so this is a nop (CIK).
5994  */
cik_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)5995 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5996 {
5997 	return 0;
5998 }
5999 
6000 /*
6001  * vm
6002  * VMID 0 is the physical GPU addresses as used by the kernel.
6003  * VMIDs 1-15 are used for userspace clients and are handled
6004  * by the radeon vm/hsa code.
6005  */
6006 /**
6007  * cik_vm_init - cik vm init callback
6008  *
6009  * @rdev: radeon_device pointer
6010  *
6011  * Inits cik specific vm parameters (number of VMs, base of vram for
6012  * VMIDs 1-15) (CIK).
6013  * Returns 0 for success.
6014  */
cik_vm_init(struct radeon_device * rdev)6015 int cik_vm_init(struct radeon_device *rdev)
6016 {
6017 	/*
6018 	 * number of VMs
6019 	 * VMID 0 is reserved for System
6020 	 * radeon graphics/compute will use VMIDs 1-7
6021 	 * amdkfd will use VMIDs 8-15
6022 	 */
6023 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6024 	/* base offset of vram pages */
6025 	if (rdev->flags & RADEON_IS_IGP) {
6026 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
6027 		tmp <<= 22;
6028 		rdev->vm_manager.vram_base_offset = tmp;
6029 	} else
6030 		rdev->vm_manager.vram_base_offset = 0;
6031 
6032 	return 0;
6033 }
6034 
6035 /**
6036  * cik_vm_fini - cik vm fini callback
6037  *
6038  * @rdev: radeon_device pointer
6039  *
6040  * Tear down any asic specific VM setup (CIK).
6041  */
cik_vm_fini(struct radeon_device * rdev)6042 void cik_vm_fini(struct radeon_device *rdev)
6043 {
6044 }
6045 
6046 /**
6047  * cik_vm_decode_fault - print human readable fault info
6048  *
6049  * @rdev: radeon_device pointer
6050  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6051  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6052  *
6053  * Print human readable fault information (CIK).
6054  */
cik_vm_decode_fault(struct radeon_device * rdev,u32 status,u32 addr,u32 mc_client)6055 static void cik_vm_decode_fault(struct radeon_device *rdev,
6056 				u32 status, u32 addr, u32 mc_client)
6057 {
6058 	u32 mc_id;
6059 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6060 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6061 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6062 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6063 
6064 	if (rdev->family == CHIP_HAWAII)
6065 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6066 	else
6067 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6068 
6069 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6070 	       protections, vmid, addr,
6071 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6072 	       block, mc_client, mc_id);
6073 }
6074 
6075 /**
6076  * cik_vm_flush - cik vm flush using the CP
6077  *
6078  * @rdev: radeon_device pointer
6079  *
6080  * Update the page table base and flush the VM TLB
6081  * using the CP (CIK).
6082  */
cik_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)6083 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6084 		  unsigned vm_id, uint64_t pd_addr)
6085 {
6086 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6087 
6088 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6089 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6090 				 WRITE_DATA_DST_SEL(0)));
6091 	if (vm_id < 8) {
6092 		radeon_ring_write(ring,
6093 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6094 	} else {
6095 		radeon_ring_write(ring,
6096 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6097 	}
6098 	radeon_ring_write(ring, 0);
6099 	radeon_ring_write(ring, pd_addr >> 12);
6100 
6101 	/* update SH_MEM_* regs */
6102 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6103 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6104 				 WRITE_DATA_DST_SEL(0)));
6105 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6106 	radeon_ring_write(ring, 0);
6107 	radeon_ring_write(ring, VMID(vm_id));
6108 
6109 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6110 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6111 				 WRITE_DATA_DST_SEL(0)));
6112 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6113 	radeon_ring_write(ring, 0);
6114 
6115 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6116 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6117 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6118 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6119 
6120 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6121 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6122 				 WRITE_DATA_DST_SEL(0)));
6123 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6124 	radeon_ring_write(ring, 0);
6125 	radeon_ring_write(ring, VMID(0));
6126 
6127 	/* HDP flush */
6128 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6129 
6130 	/* bits 0-15 are the VM contexts0-15 */
6131 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6132 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6133 				 WRITE_DATA_DST_SEL(0)));
6134 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6135 	radeon_ring_write(ring, 0);
6136 	radeon_ring_write(ring, 1 << vm_id);
6137 
6138 	/* wait for the invalidate to complete */
6139 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6140 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6141 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6142 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6143 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6144 	radeon_ring_write(ring, 0);
6145 	radeon_ring_write(ring, 0); /* ref */
6146 	radeon_ring_write(ring, 0); /* mask */
6147 	radeon_ring_write(ring, 0x20); /* poll interval */
6148 
6149 	/* compute doesn't have PFP */
6150 	if (usepfp) {
6151 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6152 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6153 		radeon_ring_write(ring, 0x0);
6154 	}
6155 }
6156 
6157 /*
6158  * RLC
6159  * The RLC is a multi-purpose microengine that handles a
6160  * variety of functions, the most important of which is
6161  * the interrupt controller.
6162  */
cik_enable_gui_idle_interrupt(struct radeon_device * rdev,bool enable)6163 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6164 					  bool enable)
6165 {
6166 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6167 
6168 	if (enable)
6169 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6170 	else
6171 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6172 	WREG32(CP_INT_CNTL_RING0, tmp);
6173 }
6174 
cik_enable_lbpw(struct radeon_device * rdev,bool enable)6175 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6176 {
6177 	u32 tmp;
6178 
6179 	tmp = RREG32(RLC_LB_CNTL);
6180 	if (enable)
6181 		tmp |= LOAD_BALANCE_ENABLE;
6182 	else
6183 		tmp &= ~LOAD_BALANCE_ENABLE;
6184 	WREG32(RLC_LB_CNTL, tmp);
6185 }
6186 
cik_wait_for_rlc_serdes(struct radeon_device * rdev)6187 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6188 {
6189 	u32 i, j, k;
6190 	u32 mask;
6191 
6192 	mutex_lock(&rdev->grbm_idx_mutex);
6193 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6194 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6195 			cik_select_se_sh(rdev, i, j);
6196 			for (k = 0; k < rdev->usec_timeout; k++) {
6197 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6198 					break;
6199 				udelay(1);
6200 			}
6201 		}
6202 	}
6203 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6204 	mutex_unlock(&rdev->grbm_idx_mutex);
6205 
6206 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6207 	for (k = 0; k < rdev->usec_timeout; k++) {
6208 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6209 			break;
6210 		udelay(1);
6211 	}
6212 }
6213 
cik_update_rlc(struct radeon_device * rdev,u32 rlc)6214 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6215 {
6216 	u32 tmp;
6217 
6218 	tmp = RREG32(RLC_CNTL);
6219 	if (tmp != rlc)
6220 		WREG32(RLC_CNTL, rlc);
6221 }
6222 
cik_halt_rlc(struct radeon_device * rdev)6223 static u32 cik_halt_rlc(struct radeon_device *rdev)
6224 {
6225 	u32 data, orig;
6226 
6227 	orig = data = RREG32(RLC_CNTL);
6228 
6229 	if (data & RLC_ENABLE) {
6230 		u32 i;
6231 
6232 		data &= ~RLC_ENABLE;
6233 		WREG32(RLC_CNTL, data);
6234 
6235 		for (i = 0; i < rdev->usec_timeout; i++) {
6236 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6237 				break;
6238 			udelay(1);
6239 		}
6240 
6241 		cik_wait_for_rlc_serdes(rdev);
6242 	}
6243 
6244 	return orig;
6245 }
6246 
cik_enter_rlc_safe_mode(struct radeon_device * rdev)6247 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6248 {
6249 	u32 tmp, i, mask;
6250 
6251 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6252 	WREG32(RLC_GPR_REG2, tmp);
6253 
6254 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6255 	for (i = 0; i < rdev->usec_timeout; i++) {
6256 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6257 			break;
6258 		udelay(1);
6259 	}
6260 
6261 	for (i = 0; i < rdev->usec_timeout; i++) {
6262 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6263 			break;
6264 		udelay(1);
6265 	}
6266 }
6267 
cik_exit_rlc_safe_mode(struct radeon_device * rdev)6268 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6269 {
6270 	u32 tmp;
6271 
6272 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6273 	WREG32(RLC_GPR_REG2, tmp);
6274 }
6275 
6276 /**
6277  * cik_rlc_stop - stop the RLC ME
6278  *
6279  * @rdev: radeon_device pointer
6280  *
6281  * Halt the RLC ME (MicroEngine) (CIK).
6282  */
cik_rlc_stop(struct radeon_device * rdev)6283 static void cik_rlc_stop(struct radeon_device *rdev)
6284 {
6285 	WREG32(RLC_CNTL, 0);
6286 
6287 	cik_enable_gui_idle_interrupt(rdev, false);
6288 
6289 	cik_wait_for_rlc_serdes(rdev);
6290 }
6291 
6292 /**
6293  * cik_rlc_start - start the RLC ME
6294  *
6295  * @rdev: radeon_device pointer
6296  *
6297  * Unhalt the RLC ME (MicroEngine) (CIK).
6298  */
cik_rlc_start(struct radeon_device * rdev)6299 static void cik_rlc_start(struct radeon_device *rdev)
6300 {
6301 	WREG32(RLC_CNTL, RLC_ENABLE);
6302 
6303 	cik_enable_gui_idle_interrupt(rdev, true);
6304 
6305 	udelay(50);
6306 }
6307 
6308 /**
6309  * cik_rlc_resume - setup the RLC hw
6310  *
6311  * @rdev: radeon_device pointer
6312  *
6313  * Initialize the RLC registers, load the ucode,
6314  * and start the RLC (CIK).
6315  * Returns 0 for success, -EINVAL if the ucode is not available.
6316  */
cik_rlc_resume(struct radeon_device * rdev)6317 static int cik_rlc_resume(struct radeon_device *rdev)
6318 {
6319 	u32 i, size, tmp;
6320 
6321 	if (!rdev->rlc_fw)
6322 		return -EINVAL;
6323 
6324 	cik_rlc_stop(rdev);
6325 
6326 	/* disable CG */
6327 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6328 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6329 
6330 	si_rlc_reset(rdev);
6331 
6332 	cik_init_pg(rdev);
6333 
6334 	cik_init_cg(rdev);
6335 
6336 	WREG32(RLC_LB_CNTR_INIT, 0);
6337 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6338 
6339 	mutex_lock(&rdev->grbm_idx_mutex);
6340 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6341 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6342 	WREG32(RLC_LB_PARAMS, 0x00600408);
6343 	WREG32(RLC_LB_CNTL, 0x80000004);
6344 	mutex_unlock(&rdev->grbm_idx_mutex);
6345 
6346 	WREG32(RLC_MC_CNTL, 0);
6347 	WREG32(RLC_UCODE_CNTL, 0);
6348 
6349 	if (rdev->new_fw) {
6350 		const struct rlc_firmware_header_v1_0 *hdr =
6351 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6352 		const __le32 *fw_data = (const __le32 *)
6353 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6354 
6355 		radeon_ucode_print_rlc_hdr(&hdr->header);
6356 
6357 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6358 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6359 		for (i = 0; i < size; i++)
6360 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6361 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6362 	} else {
6363 		const __be32 *fw_data;
6364 
6365 		switch (rdev->family) {
6366 		case CHIP_BONAIRE:
6367 		case CHIP_HAWAII:
6368 		default:
6369 			size = BONAIRE_RLC_UCODE_SIZE;
6370 			break;
6371 		case CHIP_KAVERI:
6372 			size = KV_RLC_UCODE_SIZE;
6373 			break;
6374 		case CHIP_KABINI:
6375 			size = KB_RLC_UCODE_SIZE;
6376 			break;
6377 		case CHIP_MULLINS:
6378 			size = ML_RLC_UCODE_SIZE;
6379 			break;
6380 		}
6381 
6382 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6383 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6384 		for (i = 0; i < size; i++)
6385 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6386 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6387 	}
6388 
6389 	/* XXX - find out what chips support lbpw */
6390 	cik_enable_lbpw(rdev, false);
6391 
6392 	if (rdev->family == CHIP_BONAIRE)
6393 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6394 
6395 	cik_rlc_start(rdev);
6396 
6397 	return 0;
6398 }
6399 
cik_enable_cgcg(struct radeon_device * rdev,bool enable)6400 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6401 {
6402 	u32 data, orig, tmp, tmp2;
6403 
6404 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6405 
6406 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6407 		cik_enable_gui_idle_interrupt(rdev, true);
6408 
6409 		tmp = cik_halt_rlc(rdev);
6410 
6411 		mutex_lock(&rdev->grbm_idx_mutex);
6412 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6413 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6414 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6415 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6416 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6417 		mutex_unlock(&rdev->grbm_idx_mutex);
6418 
6419 		cik_update_rlc(rdev, tmp);
6420 
6421 		data |= CGCG_EN | CGLS_EN;
6422 	} else {
6423 		cik_enable_gui_idle_interrupt(rdev, false);
6424 
6425 		RREG32(CB_CGTT_SCLK_CTRL);
6426 		RREG32(CB_CGTT_SCLK_CTRL);
6427 		RREG32(CB_CGTT_SCLK_CTRL);
6428 		RREG32(CB_CGTT_SCLK_CTRL);
6429 
6430 		data &= ~(CGCG_EN | CGLS_EN);
6431 	}
6432 
6433 	if (orig != data)
6434 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6435 
6436 }
6437 
cik_enable_mgcg(struct radeon_device * rdev,bool enable)6438 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6439 {
6440 	u32 data, orig, tmp = 0;
6441 
6442 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6443 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6444 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6445 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6446 				data |= CP_MEM_LS_EN;
6447 				if (orig != data)
6448 					WREG32(CP_MEM_SLP_CNTL, data);
6449 			}
6450 		}
6451 
6452 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6453 		data |= 0x00000001;
6454 		data &= 0xfffffffd;
6455 		if (orig != data)
6456 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6457 
6458 		tmp = cik_halt_rlc(rdev);
6459 
6460 		mutex_lock(&rdev->grbm_idx_mutex);
6461 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6462 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6463 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6464 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6465 		WREG32(RLC_SERDES_WR_CTRL, data);
6466 		mutex_unlock(&rdev->grbm_idx_mutex);
6467 
6468 		cik_update_rlc(rdev, tmp);
6469 
6470 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6471 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6472 			data &= ~SM_MODE_MASK;
6473 			data |= SM_MODE(0x2);
6474 			data |= SM_MODE_ENABLE;
6475 			data &= ~CGTS_OVERRIDE;
6476 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6477 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6478 				data &= ~CGTS_LS_OVERRIDE;
6479 			data &= ~ON_MONITOR_ADD_MASK;
6480 			data |= ON_MONITOR_ADD_EN;
6481 			data |= ON_MONITOR_ADD(0x96);
6482 			if (orig != data)
6483 				WREG32(CGTS_SM_CTRL_REG, data);
6484 		}
6485 	} else {
6486 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6487 		data |= 0x00000003;
6488 		if (orig != data)
6489 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6490 
6491 		data = RREG32(RLC_MEM_SLP_CNTL);
6492 		if (data & RLC_MEM_LS_EN) {
6493 			data &= ~RLC_MEM_LS_EN;
6494 			WREG32(RLC_MEM_SLP_CNTL, data);
6495 		}
6496 
6497 		data = RREG32(CP_MEM_SLP_CNTL);
6498 		if (data & CP_MEM_LS_EN) {
6499 			data &= ~CP_MEM_LS_EN;
6500 			WREG32(CP_MEM_SLP_CNTL, data);
6501 		}
6502 
6503 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6504 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6505 		if (orig != data)
6506 			WREG32(CGTS_SM_CTRL_REG, data);
6507 
6508 		tmp = cik_halt_rlc(rdev);
6509 
6510 		mutex_lock(&rdev->grbm_idx_mutex);
6511 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6512 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6513 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6514 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6515 		WREG32(RLC_SERDES_WR_CTRL, data);
6516 		mutex_unlock(&rdev->grbm_idx_mutex);
6517 
6518 		cik_update_rlc(rdev, tmp);
6519 	}
6520 }
6521 
6522 static const u32 mc_cg_registers[] =
6523 {
6524 	MC_HUB_MISC_HUB_CG,
6525 	MC_HUB_MISC_SIP_CG,
6526 	MC_HUB_MISC_VM_CG,
6527 	MC_XPB_CLK_GAT,
6528 	ATC_MISC_CG,
6529 	MC_CITF_MISC_WR_CG,
6530 	MC_CITF_MISC_RD_CG,
6531 	MC_CITF_MISC_VM_CG,
6532 	VM_L2_CG,
6533 };
6534 
cik_enable_mc_ls(struct radeon_device * rdev,bool enable)6535 static void cik_enable_mc_ls(struct radeon_device *rdev,
6536 			     bool enable)
6537 {
6538 	int i;
6539 	u32 orig, data;
6540 
6541 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6542 		orig = data = RREG32(mc_cg_registers[i]);
6543 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6544 			data |= MC_LS_ENABLE;
6545 		else
6546 			data &= ~MC_LS_ENABLE;
6547 		if (data != orig)
6548 			WREG32(mc_cg_registers[i], data);
6549 	}
6550 }
6551 
cik_enable_mc_mgcg(struct radeon_device * rdev,bool enable)6552 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6553 			       bool enable)
6554 {
6555 	int i;
6556 	u32 orig, data;
6557 
6558 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6559 		orig = data = RREG32(mc_cg_registers[i]);
6560 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6561 			data |= MC_CG_ENABLE;
6562 		else
6563 			data &= ~MC_CG_ENABLE;
6564 		if (data != orig)
6565 			WREG32(mc_cg_registers[i], data);
6566 	}
6567 }
6568 
cik_enable_sdma_mgcg(struct radeon_device * rdev,bool enable)6569 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6570 				 bool enable)
6571 {
6572 	u32 orig, data;
6573 
6574 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6575 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6576 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6577 	} else {
6578 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6579 		data |= 0xff000000;
6580 		if (data != orig)
6581 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6582 
6583 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6584 		data |= 0xff000000;
6585 		if (data != orig)
6586 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6587 	}
6588 }
6589 
cik_enable_sdma_mgls(struct radeon_device * rdev,bool enable)6590 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6591 				 bool enable)
6592 {
6593 	u32 orig, data;
6594 
6595 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6596 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6597 		data |= 0x100;
6598 		if (orig != data)
6599 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6600 
6601 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6602 		data |= 0x100;
6603 		if (orig != data)
6604 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6605 	} else {
6606 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6607 		data &= ~0x100;
6608 		if (orig != data)
6609 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6610 
6611 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6612 		data &= ~0x100;
6613 		if (orig != data)
6614 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6615 	}
6616 }
6617 
cik_enable_uvd_mgcg(struct radeon_device * rdev,bool enable)6618 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6619 				bool enable)
6620 {
6621 	u32 orig, data;
6622 
6623 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6624 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6625 		data = 0xfff;
6626 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6627 
6628 		orig = data = RREG32(UVD_CGC_CTRL);
6629 		data |= DCM;
6630 		if (orig != data)
6631 			WREG32(UVD_CGC_CTRL, data);
6632 	} else {
6633 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6634 		data &= ~0xfff;
6635 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6636 
6637 		orig = data = RREG32(UVD_CGC_CTRL);
6638 		data &= ~DCM;
6639 		if (orig != data)
6640 			WREG32(UVD_CGC_CTRL, data);
6641 	}
6642 }
6643 
cik_enable_bif_mgls(struct radeon_device * rdev,bool enable)6644 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6645 			       bool enable)
6646 {
6647 	u32 orig, data;
6648 
6649 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6650 
6651 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6652 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6653 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6654 	else
6655 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6656 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6657 
6658 	if (orig != data)
6659 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6660 }
6661 
cik_enable_hdp_mgcg(struct radeon_device * rdev,bool enable)6662 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6663 				bool enable)
6664 {
6665 	u32 orig, data;
6666 
6667 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6668 
6669 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6670 		data &= ~CLOCK_GATING_DIS;
6671 	else
6672 		data |= CLOCK_GATING_DIS;
6673 
6674 	if (orig != data)
6675 		WREG32(HDP_HOST_PATH_CNTL, data);
6676 }
6677 
cik_enable_hdp_ls(struct radeon_device * rdev,bool enable)6678 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6679 			      bool enable)
6680 {
6681 	u32 orig, data;
6682 
6683 	orig = data = RREG32(HDP_MEM_POWER_LS);
6684 
6685 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6686 		data |= HDP_LS_ENABLE;
6687 	else
6688 		data &= ~HDP_LS_ENABLE;
6689 
6690 	if (orig != data)
6691 		WREG32(HDP_MEM_POWER_LS, data);
6692 }
6693 
cik_update_cg(struct radeon_device * rdev,u32 block,bool enable)6694 void cik_update_cg(struct radeon_device *rdev,
6695 		   u32 block, bool enable)
6696 {
6697 
6698 	if (block & RADEON_CG_BLOCK_GFX) {
6699 		cik_enable_gui_idle_interrupt(rdev, false);
6700 		/* order matters! */
6701 		if (enable) {
6702 			cik_enable_mgcg(rdev, true);
6703 			cik_enable_cgcg(rdev, true);
6704 		} else {
6705 			cik_enable_cgcg(rdev, false);
6706 			cik_enable_mgcg(rdev, false);
6707 		}
6708 		cik_enable_gui_idle_interrupt(rdev, true);
6709 	}
6710 
6711 	if (block & RADEON_CG_BLOCK_MC) {
6712 		if (!(rdev->flags & RADEON_IS_IGP)) {
6713 			cik_enable_mc_mgcg(rdev, enable);
6714 			cik_enable_mc_ls(rdev, enable);
6715 		}
6716 	}
6717 
6718 	if (block & RADEON_CG_BLOCK_SDMA) {
6719 		cik_enable_sdma_mgcg(rdev, enable);
6720 		cik_enable_sdma_mgls(rdev, enable);
6721 	}
6722 
6723 	if (block & RADEON_CG_BLOCK_BIF) {
6724 		cik_enable_bif_mgls(rdev, enable);
6725 	}
6726 
6727 	if (block & RADEON_CG_BLOCK_UVD) {
6728 		if (rdev->has_uvd)
6729 			cik_enable_uvd_mgcg(rdev, enable);
6730 	}
6731 
6732 	if (block & RADEON_CG_BLOCK_HDP) {
6733 		cik_enable_hdp_mgcg(rdev, enable);
6734 		cik_enable_hdp_ls(rdev, enable);
6735 	}
6736 
6737 	if (block & RADEON_CG_BLOCK_VCE) {
6738 		vce_v2_0_enable_mgcg(rdev, enable);
6739 	}
6740 }
6741 
cik_init_cg(struct radeon_device * rdev)6742 static void cik_init_cg(struct radeon_device *rdev)
6743 {
6744 
6745 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6746 
6747 	if (rdev->has_uvd)
6748 		si_init_uvd_internal_cg(rdev);
6749 
6750 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6751 			     RADEON_CG_BLOCK_SDMA |
6752 			     RADEON_CG_BLOCK_BIF |
6753 			     RADEON_CG_BLOCK_UVD |
6754 			     RADEON_CG_BLOCK_HDP), true);
6755 }
6756 
cik_fini_cg(struct radeon_device * rdev)6757 static void cik_fini_cg(struct radeon_device *rdev)
6758 {
6759 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6760 			     RADEON_CG_BLOCK_SDMA |
6761 			     RADEON_CG_BLOCK_BIF |
6762 			     RADEON_CG_BLOCK_UVD |
6763 			     RADEON_CG_BLOCK_HDP), false);
6764 
6765 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6766 }
6767 
cik_enable_sck_slowdown_on_pu(struct radeon_device * rdev,bool enable)6768 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6769 					  bool enable)
6770 {
6771 	u32 data, orig;
6772 
6773 	orig = data = RREG32(RLC_PG_CNTL);
6774 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6775 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6776 	else
6777 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6778 	if (orig != data)
6779 		WREG32(RLC_PG_CNTL, data);
6780 }
6781 
cik_enable_sck_slowdown_on_pd(struct radeon_device * rdev,bool enable)6782 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6783 					  bool enable)
6784 {
6785 	u32 data, orig;
6786 
6787 	orig = data = RREG32(RLC_PG_CNTL);
6788 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6789 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6790 	else
6791 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6792 	if (orig != data)
6793 		WREG32(RLC_PG_CNTL, data);
6794 }
6795 
cik_enable_cp_pg(struct radeon_device * rdev,bool enable)6796 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6797 {
6798 	u32 data, orig;
6799 
6800 	orig = data = RREG32(RLC_PG_CNTL);
6801 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6802 		data &= ~DISABLE_CP_PG;
6803 	else
6804 		data |= DISABLE_CP_PG;
6805 	if (orig != data)
6806 		WREG32(RLC_PG_CNTL, data);
6807 }
6808 
cik_enable_gds_pg(struct radeon_device * rdev,bool enable)6809 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6810 {
6811 	u32 data, orig;
6812 
6813 	orig = data = RREG32(RLC_PG_CNTL);
6814 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6815 		data &= ~DISABLE_GDS_PG;
6816 	else
6817 		data |= DISABLE_GDS_PG;
6818 	if (orig != data)
6819 		WREG32(RLC_PG_CNTL, data);
6820 }
6821 
6822 #define CP_ME_TABLE_SIZE    96
6823 #define CP_ME_TABLE_OFFSET  2048
6824 #define CP_MEC_TABLE_OFFSET 4096
6825 
cik_init_cp_pg_table(struct radeon_device * rdev)6826 void cik_init_cp_pg_table(struct radeon_device *rdev)
6827 {
6828 	volatile u32 *dst_ptr;
6829 	int me, i, max_me = 4;
6830 	u32 bo_offset = 0;
6831 	u32 table_offset, table_size;
6832 
6833 	if (rdev->family == CHIP_KAVERI)
6834 		max_me = 5;
6835 
6836 	if (rdev->rlc.cp_table_ptr == NULL)
6837 		return;
6838 
6839 	/* write the cp table buffer */
6840 	dst_ptr = rdev->rlc.cp_table_ptr;
6841 	for (me = 0; me < max_me; me++) {
6842 		if (rdev->new_fw) {
6843 			const __le32 *fw_data;
6844 			const struct gfx_firmware_header_v1_0 *hdr;
6845 
6846 			if (me == 0) {
6847 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6848 				fw_data = (const __le32 *)
6849 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6850 				table_offset = le32_to_cpu(hdr->jt_offset);
6851 				table_size = le32_to_cpu(hdr->jt_size);
6852 			} else if (me == 1) {
6853 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6854 				fw_data = (const __le32 *)
6855 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6856 				table_offset = le32_to_cpu(hdr->jt_offset);
6857 				table_size = le32_to_cpu(hdr->jt_size);
6858 			} else if (me == 2) {
6859 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6860 				fw_data = (const __le32 *)
6861 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6862 				table_offset = le32_to_cpu(hdr->jt_offset);
6863 				table_size = le32_to_cpu(hdr->jt_size);
6864 			} else if (me == 3) {
6865 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6866 				fw_data = (const __le32 *)
6867 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6868 				table_offset = le32_to_cpu(hdr->jt_offset);
6869 				table_size = le32_to_cpu(hdr->jt_size);
6870 			} else {
6871 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6872 				fw_data = (const __le32 *)
6873 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6874 				table_offset = le32_to_cpu(hdr->jt_offset);
6875 				table_size = le32_to_cpu(hdr->jt_size);
6876 			}
6877 
6878 			for (i = 0; i < table_size; i ++) {
6879 				dst_ptr[bo_offset + i] =
6880 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6881 			}
6882 			bo_offset += table_size;
6883 		} else {
6884 			const __be32 *fw_data;
6885 			table_size = CP_ME_TABLE_SIZE;
6886 
6887 			if (me == 0) {
6888 				fw_data = (const __be32 *)rdev->ce_fw->data;
6889 				table_offset = CP_ME_TABLE_OFFSET;
6890 			} else if (me == 1) {
6891 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6892 				table_offset = CP_ME_TABLE_OFFSET;
6893 			} else if (me == 2) {
6894 				fw_data = (const __be32 *)rdev->me_fw->data;
6895 				table_offset = CP_ME_TABLE_OFFSET;
6896 			} else {
6897 				fw_data = (const __be32 *)rdev->mec_fw->data;
6898 				table_offset = CP_MEC_TABLE_OFFSET;
6899 			}
6900 
6901 			for (i = 0; i < table_size; i ++) {
6902 				dst_ptr[bo_offset + i] =
6903 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6904 			}
6905 			bo_offset += table_size;
6906 		}
6907 	}
6908 }
6909 
cik_enable_gfx_cgpg(struct radeon_device * rdev,bool enable)6910 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6911 				bool enable)
6912 {
6913 	u32 data, orig;
6914 
6915 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6916 		orig = data = RREG32(RLC_PG_CNTL);
6917 		data |= GFX_PG_ENABLE;
6918 		if (orig != data)
6919 			WREG32(RLC_PG_CNTL, data);
6920 
6921 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6922 		data |= AUTO_PG_EN;
6923 		if (orig != data)
6924 			WREG32(RLC_AUTO_PG_CTRL, data);
6925 	} else {
6926 		orig = data = RREG32(RLC_PG_CNTL);
6927 		data &= ~GFX_PG_ENABLE;
6928 		if (orig != data)
6929 			WREG32(RLC_PG_CNTL, data);
6930 
6931 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6932 		data &= ~AUTO_PG_EN;
6933 		if (orig != data)
6934 			WREG32(RLC_AUTO_PG_CTRL, data);
6935 
6936 		data = RREG32(DB_RENDER_CONTROL);
6937 	}
6938 }
6939 
cik_get_cu_active_bitmap(struct radeon_device * rdev,u32 se,u32 sh)6940 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6941 {
6942 	u32 mask = 0, tmp, tmp1;
6943 	int i;
6944 
6945 	mutex_lock(&rdev->grbm_idx_mutex);
6946 	cik_select_se_sh(rdev, se, sh);
6947 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6948 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6949 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6950 	mutex_unlock(&rdev->grbm_idx_mutex);
6951 
6952 	tmp &= 0xffff0000;
6953 
6954 	tmp |= tmp1;
6955 	tmp >>= 16;
6956 
6957 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6958 		mask <<= 1;
6959 		mask |= 1;
6960 	}
6961 
6962 	return (~tmp) & mask;
6963 }
6964 
cik_init_ao_cu_mask(struct radeon_device * rdev)6965 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6966 {
6967 	u32 i, j, k, active_cu_number = 0;
6968 	u32 mask, counter, cu_bitmap;
6969 	u32 tmp = 0;
6970 
6971 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6972 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6973 			mask = 1;
6974 			cu_bitmap = 0;
6975 			counter = 0;
6976 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6977 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6978 					if (counter < 2)
6979 						cu_bitmap |= mask;
6980 					counter ++;
6981 				}
6982 				mask <<= 1;
6983 			}
6984 
6985 			active_cu_number += counter;
6986 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6987 		}
6988 	}
6989 
6990 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6991 
6992 	tmp = RREG32(RLC_MAX_PG_CU);
6993 	tmp &= ~MAX_PU_CU_MASK;
6994 	tmp |= MAX_PU_CU(active_cu_number);
6995 	WREG32(RLC_MAX_PG_CU, tmp);
6996 }
6997 
cik_enable_gfx_static_mgpg(struct radeon_device * rdev,bool enable)6998 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6999 				       bool enable)
7000 {
7001 	u32 data, orig;
7002 
7003 	orig = data = RREG32(RLC_PG_CNTL);
7004 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7005 		data |= STATIC_PER_CU_PG_ENABLE;
7006 	else
7007 		data &= ~STATIC_PER_CU_PG_ENABLE;
7008 	if (orig != data)
7009 		WREG32(RLC_PG_CNTL, data);
7010 }
7011 
cik_enable_gfx_dynamic_mgpg(struct radeon_device * rdev,bool enable)7012 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7013 					bool enable)
7014 {
7015 	u32 data, orig;
7016 
7017 	orig = data = RREG32(RLC_PG_CNTL);
7018 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7019 		data |= DYN_PER_CU_PG_ENABLE;
7020 	else
7021 		data &= ~DYN_PER_CU_PG_ENABLE;
7022 	if (orig != data)
7023 		WREG32(RLC_PG_CNTL, data);
7024 }
7025 
7026 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7027 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7028 
cik_init_gfx_cgpg(struct radeon_device * rdev)7029 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7030 {
7031 	u32 data, orig;
7032 	u32 i;
7033 
7034 	if (rdev->rlc.cs_data) {
7035 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7036 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7037 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7038 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7039 	} else {
7040 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7041 		for (i = 0; i < 3; i++)
7042 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
7043 	}
7044 	if (rdev->rlc.reg_list) {
7045 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7046 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
7047 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7048 	}
7049 
7050 	orig = data = RREG32(RLC_PG_CNTL);
7051 	data |= GFX_PG_SRC;
7052 	if (orig != data)
7053 		WREG32(RLC_PG_CNTL, data);
7054 
7055 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7056 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7057 
7058 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
7059 	data &= ~IDLE_POLL_COUNT_MASK;
7060 	data |= IDLE_POLL_COUNT(0x60);
7061 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7062 
7063 	data = 0x10101010;
7064 	WREG32(RLC_PG_DELAY, data);
7065 
7066 	data = RREG32(RLC_PG_DELAY_2);
7067 	data &= ~0xff;
7068 	data |= 0x3;
7069 	WREG32(RLC_PG_DELAY_2, data);
7070 
7071 	data = RREG32(RLC_AUTO_PG_CTRL);
7072 	data &= ~GRBM_REG_SGIT_MASK;
7073 	data |= GRBM_REG_SGIT(0x700);
7074 	WREG32(RLC_AUTO_PG_CTRL, data);
7075 
7076 }
7077 
cik_update_gfx_pg(struct radeon_device * rdev,bool enable)7078 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7079 {
7080 	cik_enable_gfx_cgpg(rdev, enable);
7081 	cik_enable_gfx_static_mgpg(rdev, enable);
7082 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7083 }
7084 
cik_get_csb_size(struct radeon_device * rdev)7085 u32 cik_get_csb_size(struct radeon_device *rdev)
7086 {
7087 	u32 count = 0;
7088 	const struct cs_section_def *sect = NULL;
7089 	const struct cs_extent_def *ext = NULL;
7090 
7091 	if (rdev->rlc.cs_data == NULL)
7092 		return 0;
7093 
7094 	/* begin clear state */
7095 	count += 2;
7096 	/* context control state */
7097 	count += 3;
7098 
7099 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7100 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7101 			if (sect->id == SECT_CONTEXT)
7102 				count += 2 + ext->reg_count;
7103 			else
7104 				return 0;
7105 		}
7106 	}
7107 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7108 	count += 4;
7109 	/* end clear state */
7110 	count += 2;
7111 	/* clear state */
7112 	count += 2;
7113 
7114 	return count;
7115 }
7116 
cik_get_csb_buffer(struct radeon_device * rdev,volatile u32 * buffer)7117 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7118 {
7119 	u32 count = 0, i;
7120 	const struct cs_section_def *sect = NULL;
7121 	const struct cs_extent_def *ext = NULL;
7122 
7123 	if (rdev->rlc.cs_data == NULL)
7124 		return;
7125 	if (buffer == NULL)
7126 		return;
7127 
7128 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7129 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7130 
7131 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7132 	buffer[count++] = cpu_to_le32(0x80000000);
7133 	buffer[count++] = cpu_to_le32(0x80000000);
7134 
7135 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7136 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7137 			if (sect->id == SECT_CONTEXT) {
7138 				buffer[count++] =
7139 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7140 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7141 				for (i = 0; i < ext->reg_count; i++)
7142 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7143 			} else {
7144 				return;
7145 			}
7146 		}
7147 	}
7148 
7149 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7150 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7151 	switch (rdev->family) {
7152 	case CHIP_BONAIRE:
7153 		buffer[count++] = cpu_to_le32(0x16000012);
7154 		buffer[count++] = cpu_to_le32(0x00000000);
7155 		break;
7156 	case CHIP_KAVERI:
7157 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7158 		buffer[count++] = cpu_to_le32(0x00000000);
7159 		break;
7160 	case CHIP_KABINI:
7161 	case CHIP_MULLINS:
7162 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7163 		buffer[count++] = cpu_to_le32(0x00000000);
7164 		break;
7165 	case CHIP_HAWAII:
7166 		buffer[count++] = cpu_to_le32(0x3a00161a);
7167 		buffer[count++] = cpu_to_le32(0x0000002e);
7168 		break;
7169 	default:
7170 		buffer[count++] = cpu_to_le32(0x00000000);
7171 		buffer[count++] = cpu_to_le32(0x00000000);
7172 		break;
7173 	}
7174 
7175 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7176 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7177 
7178 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7179 	buffer[count++] = cpu_to_le32(0);
7180 }
7181 
cik_init_pg(struct radeon_device * rdev)7182 static void cik_init_pg(struct radeon_device *rdev)
7183 {
7184 	if (rdev->pg_flags) {
7185 		cik_enable_sck_slowdown_on_pu(rdev, true);
7186 		cik_enable_sck_slowdown_on_pd(rdev, true);
7187 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7188 			cik_init_gfx_cgpg(rdev);
7189 			cik_enable_cp_pg(rdev, true);
7190 			cik_enable_gds_pg(rdev, true);
7191 		}
7192 		cik_init_ao_cu_mask(rdev);
7193 		cik_update_gfx_pg(rdev, true);
7194 	}
7195 }
7196 
cik_fini_pg(struct radeon_device * rdev)7197 static void cik_fini_pg(struct radeon_device *rdev)
7198 {
7199 	if (rdev->pg_flags) {
7200 		cik_update_gfx_pg(rdev, false);
7201 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7202 			cik_enable_cp_pg(rdev, false);
7203 			cik_enable_gds_pg(rdev, false);
7204 		}
7205 	}
7206 }
7207 
7208 /*
7209  * Interrupts
7210  * Starting with r6xx, interrupts are handled via a ring buffer.
7211  * Ring buffers are areas of GPU accessible memory that the GPU
7212  * writes interrupt vectors into and the host reads vectors out of.
7213  * There is a rptr (read pointer) that determines where the
7214  * host is currently reading, and a wptr (write pointer)
7215  * which determines where the GPU has written.  When the
7216  * pointers are equal, the ring is idle.  When the GPU
7217  * writes vectors to the ring buffer, it increments the
7218  * wptr.  When there is an interrupt, the host then starts
7219  * fetching commands and processing them until the pointers are
7220  * equal again at which point it updates the rptr.
7221  */
7222 
7223 /**
7224  * cik_enable_interrupts - Enable the interrupt ring buffer
7225  *
7226  * @rdev: radeon_device pointer
7227  *
7228  * Enable the interrupt ring buffer (CIK).
7229  */
cik_enable_interrupts(struct radeon_device * rdev)7230 static void cik_enable_interrupts(struct radeon_device *rdev)
7231 {
7232 	u32 ih_cntl = RREG32(IH_CNTL);
7233 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7234 
7235 	ih_cntl |= ENABLE_INTR;
7236 	ih_rb_cntl |= IH_RB_ENABLE;
7237 	WREG32(IH_CNTL, ih_cntl);
7238 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7239 	rdev->ih.enabled = true;
7240 }
7241 
7242 /**
7243  * cik_disable_interrupts - Disable the interrupt ring buffer
7244  *
7245  * @rdev: radeon_device pointer
7246  *
7247  * Disable the interrupt ring buffer (CIK).
7248  */
cik_disable_interrupts(struct radeon_device * rdev)7249 static void cik_disable_interrupts(struct radeon_device *rdev)
7250 {
7251 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7252 	u32 ih_cntl = RREG32(IH_CNTL);
7253 
7254 	ih_rb_cntl &= ~IH_RB_ENABLE;
7255 	ih_cntl &= ~ENABLE_INTR;
7256 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7257 	WREG32(IH_CNTL, ih_cntl);
7258 	/* set rptr, wptr to 0 */
7259 	WREG32(IH_RB_RPTR, 0);
7260 	WREG32(IH_RB_WPTR, 0);
7261 	rdev->ih.enabled = false;
7262 	rdev->ih.rptr = 0;
7263 }
7264 
7265 /**
7266  * cik_disable_interrupt_state - Disable all interrupt sources
7267  *
7268  * @rdev: radeon_device pointer
7269  *
7270  * Clear all interrupt enable bits used by the driver (CIK).
7271  */
cik_disable_interrupt_state(struct radeon_device * rdev)7272 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7273 {
7274 	u32 tmp;
7275 
7276 	/* gfx ring */
7277 	tmp = RREG32(CP_INT_CNTL_RING0) &
7278 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7279 	WREG32(CP_INT_CNTL_RING0, tmp);
7280 	/* sdma */
7281 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7282 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7283 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7284 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7285 	/* compute queues */
7286 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7287 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7288 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7289 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7290 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7291 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7292 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7293 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7294 	/* grbm */
7295 	WREG32(GRBM_INT_CNTL, 0);
7296 	/* SRBM */
7297 	WREG32(SRBM_INT_CNTL, 0);
7298 	/* vline/vblank, etc. */
7299 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7300 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7301 	if (rdev->num_crtc >= 4) {
7302 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7303 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7304 	}
7305 	if (rdev->num_crtc >= 6) {
7306 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7307 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7308 	}
7309 	/* pflip */
7310 	if (rdev->num_crtc >= 2) {
7311 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7312 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7313 	}
7314 	if (rdev->num_crtc >= 4) {
7315 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7316 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7317 	}
7318 	if (rdev->num_crtc >= 6) {
7319 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7320 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7321 	}
7322 
7323 	/* dac hotplug */
7324 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7325 
7326 	/* digital hotplug */
7327 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7328 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7329 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7330 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7331 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7332 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7333 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7334 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7335 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7336 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7337 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7338 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7339 
7340 }
7341 
7342 /**
7343  * cik_irq_init - init and enable the interrupt ring
7344  *
7345  * @rdev: radeon_device pointer
7346  *
7347  * Allocate a ring buffer for the interrupt controller,
7348  * enable the RLC, disable interrupts, enable the IH
7349  * ring buffer and enable it (CIK).
7350  * Called at device load and reume.
7351  * Returns 0 for success, errors for failure.
7352  */
cik_irq_init(struct radeon_device * rdev)7353 static int cik_irq_init(struct radeon_device *rdev)
7354 {
7355 	int ret = 0;
7356 	int rb_bufsz;
7357 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7358 
7359 	/* allocate ring */
7360 	ret = r600_ih_ring_alloc(rdev);
7361 	if (ret)
7362 		return ret;
7363 
7364 	/* disable irqs */
7365 	cik_disable_interrupts(rdev);
7366 
7367 	/* init rlc */
7368 	ret = cik_rlc_resume(rdev);
7369 	if (ret) {
7370 		r600_ih_ring_fini(rdev);
7371 		return ret;
7372 	}
7373 
7374 	/* setup interrupt control */
7375 	/* set dummy read address to dummy page address */
7376 	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
7377 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7378 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7379 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7380 	 */
7381 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7382 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7383 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7384 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7385 
7386 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7387 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7388 
7389 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7390 		      IH_WPTR_OVERFLOW_CLEAR |
7391 		      (rb_bufsz << 1));
7392 
7393 	if (rdev->wb.enabled)
7394 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7395 
7396 	/* set the writeback address whether it's enabled or not */
7397 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7398 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7399 
7400 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7401 
7402 	/* set rptr, wptr to 0 */
7403 	WREG32(IH_RB_RPTR, 0);
7404 	WREG32(IH_RB_WPTR, 0);
7405 
7406 	/* Default settings for IH_CNTL (disabled at first) */
7407 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7408 	/* RPTR_REARM only works if msi's are enabled */
7409 	if (rdev->msi_enabled)
7410 		ih_cntl |= RPTR_REARM;
7411 	WREG32(IH_CNTL, ih_cntl);
7412 
7413 	/* force the active interrupt state to all disabled */
7414 	cik_disable_interrupt_state(rdev);
7415 
7416 	pci_set_master(rdev->pdev);
7417 
7418 	/* enable irqs */
7419 	cik_enable_interrupts(rdev);
7420 
7421 	return ret;
7422 }
7423 
7424 /**
7425  * cik_irq_set - enable/disable interrupt sources
7426  *
7427  * @rdev: radeon_device pointer
7428  *
7429  * Enable interrupt sources on the GPU (vblanks, hpd,
7430  * etc.) (CIK).
7431  * Returns 0 for success, errors for failure.
7432  */
cik_irq_set(struct radeon_device * rdev)7433 int cik_irq_set(struct radeon_device *rdev)
7434 {
7435 	u32 cp_int_cntl;
7436 	u32 cp_m1p0;
7437 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7438 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7439 	u32 grbm_int_cntl = 0;
7440 	u32 dma_cntl, dma_cntl1;
7441 
7442 	if (!rdev->irq.installed) {
7443 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7444 		return -EINVAL;
7445 	}
7446 	/* don't enable anything if the ih is disabled */
7447 	if (!rdev->ih.enabled) {
7448 		cik_disable_interrupts(rdev);
7449 		/* force the active interrupt state to all disabled */
7450 		cik_disable_interrupt_state(rdev);
7451 		return 0;
7452 	}
7453 
7454 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7455 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7456 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7457 
7458 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7459 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7460 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7461 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7462 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7463 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7464 
7465 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7466 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7467 
7468 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7469 
7470 	/* enable CP interrupts on all rings */
7471 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7472 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7473 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7474 	}
7475 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7476 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7477 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7478 		if (ring->me == 1) {
7479 			switch (ring->pipe) {
7480 			case 0:
7481 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7482 				break;
7483 			default:
7484 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7485 				break;
7486 			}
7487 		} else {
7488 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7489 		}
7490 	}
7491 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7492 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7493 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7494 		if (ring->me == 1) {
7495 			switch (ring->pipe) {
7496 			case 0:
7497 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7498 				break;
7499 			default:
7500 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7501 				break;
7502 			}
7503 		} else {
7504 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7505 		}
7506 	}
7507 
7508 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7509 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7510 		dma_cntl |= TRAP_ENABLE;
7511 	}
7512 
7513 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7514 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7515 		dma_cntl1 |= TRAP_ENABLE;
7516 	}
7517 
7518 	if (rdev->irq.crtc_vblank_int[0] ||
7519 	    atomic_read(&rdev->irq.pflip[0])) {
7520 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7521 		crtc1 |= VBLANK_INTERRUPT_MASK;
7522 	}
7523 	if (rdev->irq.crtc_vblank_int[1] ||
7524 	    atomic_read(&rdev->irq.pflip[1])) {
7525 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7526 		crtc2 |= VBLANK_INTERRUPT_MASK;
7527 	}
7528 	if (rdev->irq.crtc_vblank_int[2] ||
7529 	    atomic_read(&rdev->irq.pflip[2])) {
7530 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7531 		crtc3 |= VBLANK_INTERRUPT_MASK;
7532 	}
7533 	if (rdev->irq.crtc_vblank_int[3] ||
7534 	    atomic_read(&rdev->irq.pflip[3])) {
7535 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7536 		crtc4 |= VBLANK_INTERRUPT_MASK;
7537 	}
7538 	if (rdev->irq.crtc_vblank_int[4] ||
7539 	    atomic_read(&rdev->irq.pflip[4])) {
7540 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7541 		crtc5 |= VBLANK_INTERRUPT_MASK;
7542 	}
7543 	if (rdev->irq.crtc_vblank_int[5] ||
7544 	    atomic_read(&rdev->irq.pflip[5])) {
7545 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7546 		crtc6 |= VBLANK_INTERRUPT_MASK;
7547 	}
7548 	if (rdev->irq.hpd[0]) {
7549 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7550 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7551 	}
7552 	if (rdev->irq.hpd[1]) {
7553 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7554 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7555 	}
7556 	if (rdev->irq.hpd[2]) {
7557 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7558 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7559 	}
7560 	if (rdev->irq.hpd[3]) {
7561 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7562 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7563 	}
7564 	if (rdev->irq.hpd[4]) {
7565 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7566 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7567 	}
7568 	if (rdev->irq.hpd[5]) {
7569 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7570 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7571 	}
7572 
7573 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7574 
7575 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7576 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7577 
7578 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7579 
7580 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7581 
7582 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7583 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7584 	if (rdev->num_crtc >= 4) {
7585 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7586 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7587 	}
7588 	if (rdev->num_crtc >= 6) {
7589 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7590 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7591 	}
7592 
7593 	if (rdev->num_crtc >= 2) {
7594 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7595 		       GRPH_PFLIP_INT_MASK);
7596 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7597 		       GRPH_PFLIP_INT_MASK);
7598 	}
7599 	if (rdev->num_crtc >= 4) {
7600 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7601 		       GRPH_PFLIP_INT_MASK);
7602 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7603 		       GRPH_PFLIP_INT_MASK);
7604 	}
7605 	if (rdev->num_crtc >= 6) {
7606 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7607 		       GRPH_PFLIP_INT_MASK);
7608 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7609 		       GRPH_PFLIP_INT_MASK);
7610 	}
7611 
7612 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7613 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7614 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7615 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7616 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7617 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7618 
7619 	/* posting read */
7620 	RREG32(SRBM_STATUS);
7621 
7622 	return 0;
7623 }
7624 
7625 /**
7626  * cik_irq_ack - ack interrupt sources
7627  *
7628  * @rdev: radeon_device pointer
7629  *
7630  * Ack interrupt sources on the GPU (vblanks, hpd,
7631  * etc.) (CIK).  Certain interrupts sources are sw
7632  * generated and do not require an explicit ack.
7633  */
cik_irq_ack(struct radeon_device * rdev)7634 static inline void cik_irq_ack(struct radeon_device *rdev)
7635 {
7636 	u32 tmp;
7637 
7638 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7639 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7640 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7641 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7642 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7643 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7644 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7645 
7646 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7647 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7648 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7649 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7650 	if (rdev->num_crtc >= 4) {
7651 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7652 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7653 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7654 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7655 	}
7656 	if (rdev->num_crtc >= 6) {
7657 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7658 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7659 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7660 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7661 	}
7662 
7663 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7664 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7665 		       GRPH_PFLIP_INT_CLEAR);
7666 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7667 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7668 		       GRPH_PFLIP_INT_CLEAR);
7669 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7670 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7671 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7672 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7673 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7674 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7675 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7676 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7677 
7678 	if (rdev->num_crtc >= 4) {
7679 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7680 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7681 			       GRPH_PFLIP_INT_CLEAR);
7682 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7683 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7684 			       GRPH_PFLIP_INT_CLEAR);
7685 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7686 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7687 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7688 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7689 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7690 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7691 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7692 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7693 	}
7694 
7695 	if (rdev->num_crtc >= 6) {
7696 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7697 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7698 			       GRPH_PFLIP_INT_CLEAR);
7699 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7700 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7701 			       GRPH_PFLIP_INT_CLEAR);
7702 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7703 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7704 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7705 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7706 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7707 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7708 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7709 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7710 	}
7711 
7712 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7713 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7714 		tmp |= DC_HPDx_INT_ACK;
7715 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7716 	}
7717 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7718 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7719 		tmp |= DC_HPDx_INT_ACK;
7720 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7721 	}
7722 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7723 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7724 		tmp |= DC_HPDx_INT_ACK;
7725 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7726 	}
7727 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7728 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7729 		tmp |= DC_HPDx_INT_ACK;
7730 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7731 	}
7732 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7733 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7734 		tmp |= DC_HPDx_INT_ACK;
7735 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7736 	}
7737 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7738 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7739 		tmp |= DC_HPDx_INT_ACK;
7740 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7741 	}
7742 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7743 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7744 		tmp |= DC_HPDx_RX_INT_ACK;
7745 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7746 	}
7747 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7748 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7749 		tmp |= DC_HPDx_RX_INT_ACK;
7750 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7751 	}
7752 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7753 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7754 		tmp |= DC_HPDx_RX_INT_ACK;
7755 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7756 	}
7757 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7758 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7759 		tmp |= DC_HPDx_RX_INT_ACK;
7760 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7761 	}
7762 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7763 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7764 		tmp |= DC_HPDx_RX_INT_ACK;
7765 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7766 	}
7767 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7768 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7769 		tmp |= DC_HPDx_RX_INT_ACK;
7770 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7771 	}
7772 }
7773 
7774 /**
7775  * cik_irq_disable - disable interrupts
7776  *
7777  * @rdev: radeon_device pointer
7778  *
7779  * Disable interrupts on the hw (CIK).
7780  */
cik_irq_disable(struct radeon_device * rdev)7781 static void cik_irq_disable(struct radeon_device *rdev)
7782 {
7783 	cik_disable_interrupts(rdev);
7784 	/* Wait and acknowledge irq */
7785 	mdelay(1);
7786 	cik_irq_ack(rdev);
7787 	cik_disable_interrupt_state(rdev);
7788 }
7789 
7790 /**
7791  * cik_irq_disable - disable interrupts for suspend
7792  *
7793  * @rdev: radeon_device pointer
7794  *
7795  * Disable interrupts and stop the RLC (CIK).
7796  * Used for suspend.
7797  */
cik_irq_suspend(struct radeon_device * rdev)7798 static void cik_irq_suspend(struct radeon_device *rdev)
7799 {
7800 	cik_irq_disable(rdev);
7801 	cik_rlc_stop(rdev);
7802 }
7803 
7804 /**
7805  * cik_irq_fini - tear down interrupt support
7806  *
7807  * @rdev: radeon_device pointer
7808  *
7809  * Disable interrupts on the hw and free the IH ring
7810  * buffer (CIK).
7811  * Used for driver unload.
7812  */
cik_irq_fini(struct radeon_device * rdev)7813 static void cik_irq_fini(struct radeon_device *rdev)
7814 {
7815 	cik_irq_suspend(rdev);
7816 	r600_ih_ring_fini(rdev);
7817 }
7818 
7819 /**
7820  * cik_get_ih_wptr - get the IH ring buffer wptr
7821  *
7822  * @rdev: radeon_device pointer
7823  *
7824  * Get the IH ring buffer wptr from either the register
7825  * or the writeback memory buffer (CIK).  Also check for
7826  * ring buffer overflow and deal with it.
7827  * Used by cik_irq_process().
7828  * Returns the value of the wptr.
7829  */
cik_get_ih_wptr(struct radeon_device * rdev)7830 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7831 {
7832 	u32 wptr, tmp;
7833 
7834 	if (rdev->wb.enabled)
7835 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7836 	else
7837 		wptr = RREG32(IH_RB_WPTR);
7838 
7839 	if (wptr & RB_OVERFLOW) {
7840 		wptr &= ~RB_OVERFLOW;
7841 		/* When a ring buffer overflow happen start parsing interrupt
7842 		 * from the last not overwritten vector (wptr + 16). Hopefully
7843 		 * this should allow us to catchup.
7844 		 */
7845 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7846 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7847 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7848 		tmp = RREG32(IH_RB_CNTL);
7849 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7850 		WREG32(IH_RB_CNTL, tmp);
7851 	}
7852 	return (wptr & rdev->ih.ptr_mask);
7853 }
7854 
7855 /*        CIK IV Ring
7856  * Each IV ring entry is 128 bits:
7857  * [7:0]    - interrupt source id
7858  * [31:8]   - reserved
7859  * [59:32]  - interrupt source data
7860  * [63:60]  - reserved
7861  * [71:64]  - RINGID
7862  *            CP:
7863  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7864  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7865  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7866  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7867  *            PIPE_ID - ME0 0=3D
7868  *                    - ME1&2 compute dispatcher (4 pipes each)
7869  *            SDMA:
7870  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7871  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7872  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7873  * [79:72]  - VMID
7874  * [95:80]  - PASID
7875  * [127:96] - reserved
7876  */
7877 /**
7878  * cik_irq_process - interrupt handler
7879  *
7880  * @rdev: radeon_device pointer
7881  *
7882  * Interrupt hander (CIK).  Walk the IH ring,
7883  * ack interrupts and schedule work to handle
7884  * interrupt events.
7885  * Returns irq process return code.
7886  */
cik_irq_process(struct radeon_device * rdev)7887 int cik_irq_process(struct radeon_device *rdev)
7888 {
7889 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7890 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7891 	u32 wptr;
7892 	u32 rptr;
7893 	u32 src_id, src_data, ring_id;
7894 	u8 me_id, pipe_id, queue_id;
7895 	u32 ring_index;
7896 	bool queue_hotplug = false;
7897 	bool queue_dp = false;
7898 	bool queue_reset = false;
7899 	u32 addr, status, mc_client;
7900 	bool queue_thermal = false;
7901 
7902 	if (!rdev->ih.enabled || rdev->shutdown)
7903 		return IRQ_NONE;
7904 
7905 	wptr = cik_get_ih_wptr(rdev);
7906 
7907 restart_ih:
7908 	/* is somebody else already processing irqs? */
7909 	if (atomic_xchg(&rdev->ih.lock, 1))
7910 		return IRQ_NONE;
7911 
7912 	rptr = rdev->ih.rptr;
7913 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7914 
7915 	/* Order reading of wptr vs. reading of IH ring data */
7916 	rmb();
7917 
7918 	/* display interrupts */
7919 	cik_irq_ack(rdev);
7920 
7921 	while (rptr != wptr) {
7922 		/* wptr/rptr are in bytes! */
7923 		ring_index = rptr / 4;
7924 
7925 		radeon_kfd_interrupt(rdev,
7926 				(const void *) &rdev->ih.ring[ring_index]);
7927 
7928 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7929 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7930 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7931 
7932 		switch (src_id) {
7933 		case 1: /* D1 vblank/vline */
7934 			switch (src_data) {
7935 			case 0: /* D1 vblank */
7936 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7937 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7938 
7939 				if (rdev->irq.crtc_vblank_int[0]) {
7940 					drm_handle_vblank(rdev->ddev, 0);
7941 					rdev->pm.vblank_sync = true;
7942 					wake_up(&rdev->irq.vblank_queue);
7943 				}
7944 				if (atomic_read(&rdev->irq.pflip[0]))
7945 					radeon_crtc_handle_vblank(rdev, 0);
7946 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7947 				DRM_DEBUG("IH: D1 vblank\n");
7948 
7949 				break;
7950 			case 1: /* D1 vline */
7951 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7952 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7953 
7954 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7955 				DRM_DEBUG("IH: D1 vline\n");
7956 
7957 				break;
7958 			default:
7959 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7960 				break;
7961 			}
7962 			break;
7963 		case 2: /* D2 vblank/vline */
7964 			switch (src_data) {
7965 			case 0: /* D2 vblank */
7966 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7967 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7968 
7969 				if (rdev->irq.crtc_vblank_int[1]) {
7970 					drm_handle_vblank(rdev->ddev, 1);
7971 					rdev->pm.vblank_sync = true;
7972 					wake_up(&rdev->irq.vblank_queue);
7973 				}
7974 				if (atomic_read(&rdev->irq.pflip[1]))
7975 					radeon_crtc_handle_vblank(rdev, 1);
7976 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7977 				DRM_DEBUG("IH: D2 vblank\n");
7978 
7979 				break;
7980 			case 1: /* D2 vline */
7981 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7982 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7983 
7984 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7985 				DRM_DEBUG("IH: D2 vline\n");
7986 
7987 				break;
7988 			default:
7989 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7990 				break;
7991 			}
7992 			break;
7993 		case 3: /* D3 vblank/vline */
7994 			switch (src_data) {
7995 			case 0: /* D3 vblank */
7996 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7997 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7998 
7999 				if (rdev->irq.crtc_vblank_int[2]) {
8000 					drm_handle_vblank(rdev->ddev, 2);
8001 					rdev->pm.vblank_sync = true;
8002 					wake_up(&rdev->irq.vblank_queue);
8003 				}
8004 				if (atomic_read(&rdev->irq.pflip[2]))
8005 					radeon_crtc_handle_vblank(rdev, 2);
8006 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8007 				DRM_DEBUG("IH: D3 vblank\n");
8008 
8009 				break;
8010 			case 1: /* D3 vline */
8011 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8012 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8013 
8014 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8015 				DRM_DEBUG("IH: D3 vline\n");
8016 
8017 				break;
8018 			default:
8019 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8020 				break;
8021 			}
8022 			break;
8023 		case 4: /* D4 vblank/vline */
8024 			switch (src_data) {
8025 			case 0: /* D4 vblank */
8026 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8027 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8028 
8029 				if (rdev->irq.crtc_vblank_int[3]) {
8030 					drm_handle_vblank(rdev->ddev, 3);
8031 					rdev->pm.vblank_sync = true;
8032 					wake_up(&rdev->irq.vblank_queue);
8033 				}
8034 				if (atomic_read(&rdev->irq.pflip[3]))
8035 					radeon_crtc_handle_vblank(rdev, 3);
8036 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8037 				DRM_DEBUG("IH: D4 vblank\n");
8038 
8039 				break;
8040 			case 1: /* D4 vline */
8041 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8042 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8043 
8044 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8045 				DRM_DEBUG("IH: D4 vline\n");
8046 
8047 				break;
8048 			default:
8049 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8050 				break;
8051 			}
8052 			break;
8053 		case 5: /* D5 vblank/vline */
8054 			switch (src_data) {
8055 			case 0: /* D5 vblank */
8056 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8057 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8058 
8059 				if (rdev->irq.crtc_vblank_int[4]) {
8060 					drm_handle_vblank(rdev->ddev, 4);
8061 					rdev->pm.vblank_sync = true;
8062 					wake_up(&rdev->irq.vblank_queue);
8063 				}
8064 				if (atomic_read(&rdev->irq.pflip[4]))
8065 					radeon_crtc_handle_vblank(rdev, 4);
8066 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8067 				DRM_DEBUG("IH: D5 vblank\n");
8068 
8069 				break;
8070 			case 1: /* D5 vline */
8071 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8072 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8073 
8074 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8075 				DRM_DEBUG("IH: D5 vline\n");
8076 
8077 				break;
8078 			default:
8079 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8080 				break;
8081 			}
8082 			break;
8083 		case 6: /* D6 vblank/vline */
8084 			switch (src_data) {
8085 			case 0: /* D6 vblank */
8086 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8087 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8088 
8089 				if (rdev->irq.crtc_vblank_int[5]) {
8090 					drm_handle_vblank(rdev->ddev, 5);
8091 					rdev->pm.vblank_sync = true;
8092 					wake_up(&rdev->irq.vblank_queue);
8093 				}
8094 				if (atomic_read(&rdev->irq.pflip[5]))
8095 					radeon_crtc_handle_vblank(rdev, 5);
8096 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8097 				DRM_DEBUG("IH: D6 vblank\n");
8098 
8099 				break;
8100 			case 1: /* D6 vline */
8101 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8102 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8103 
8104 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8105 				DRM_DEBUG("IH: D6 vline\n");
8106 
8107 				break;
8108 			default:
8109 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8110 				break;
8111 			}
8112 			break;
8113 		case 8: /* D1 page flip */
8114 		case 10: /* D2 page flip */
8115 		case 12: /* D3 page flip */
8116 		case 14: /* D4 page flip */
8117 		case 16: /* D5 page flip */
8118 		case 18: /* D6 page flip */
8119 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8120 			if (radeon_use_pflipirq > 0)
8121 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8122 			break;
8123 		case 42: /* HPD hotplug */
8124 			switch (src_data) {
8125 			case 0:
8126 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8127 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8128 
8129 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8130 				queue_hotplug = true;
8131 				DRM_DEBUG("IH: HPD1\n");
8132 
8133 				break;
8134 			case 1:
8135 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8136 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8137 
8138 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8139 				queue_hotplug = true;
8140 				DRM_DEBUG("IH: HPD2\n");
8141 
8142 				break;
8143 			case 2:
8144 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8145 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8146 
8147 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8148 				queue_hotplug = true;
8149 				DRM_DEBUG("IH: HPD3\n");
8150 
8151 				break;
8152 			case 3:
8153 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8154 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8155 
8156 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8157 				queue_hotplug = true;
8158 				DRM_DEBUG("IH: HPD4\n");
8159 
8160 				break;
8161 			case 4:
8162 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8163 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8164 
8165 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8166 				queue_hotplug = true;
8167 				DRM_DEBUG("IH: HPD5\n");
8168 
8169 				break;
8170 			case 5:
8171 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8172 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8173 
8174 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8175 				queue_hotplug = true;
8176 				DRM_DEBUG("IH: HPD6\n");
8177 
8178 				break;
8179 			case 6:
8180 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8181 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8182 
8183 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8184 				queue_dp = true;
8185 				DRM_DEBUG("IH: HPD_RX 1\n");
8186 
8187 				break;
8188 			case 7:
8189 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8190 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8191 
8192 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8193 				queue_dp = true;
8194 				DRM_DEBUG("IH: HPD_RX 2\n");
8195 
8196 				break;
8197 			case 8:
8198 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8199 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8200 
8201 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8202 				queue_dp = true;
8203 				DRM_DEBUG("IH: HPD_RX 3\n");
8204 
8205 				break;
8206 			case 9:
8207 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8208 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8209 
8210 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8211 				queue_dp = true;
8212 				DRM_DEBUG("IH: HPD_RX 4\n");
8213 
8214 				break;
8215 			case 10:
8216 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8217 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8218 
8219 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8220 				queue_dp = true;
8221 				DRM_DEBUG("IH: HPD_RX 5\n");
8222 
8223 				break;
8224 			case 11:
8225 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8226 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8227 
8228 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8229 				queue_dp = true;
8230 				DRM_DEBUG("IH: HPD_RX 6\n");
8231 
8232 				break;
8233 			default:
8234 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8235 				break;
8236 			}
8237 			break;
8238 		case 96:
8239 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8240 			WREG32(SRBM_INT_ACK, 0x1);
8241 			break;
8242 		case 124: /* UVD */
8243 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8244 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8245 			break;
8246 		case 146:
8247 		case 147:
8248 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8249 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8250 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8251 			/* reset addr and status */
8252 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8253 			if (addr == 0x0 && status == 0x0)
8254 				break;
8255 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8256 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8257 				addr);
8258 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8259 				status);
8260 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8261 			break;
8262 		case 167: /* VCE */
8263 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8264 			switch (src_data) {
8265 			case 0:
8266 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8267 				break;
8268 			case 1:
8269 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8270 				break;
8271 			default:
8272 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8273 				break;
8274 			}
8275 			break;
8276 		case 176: /* GFX RB CP_INT */
8277 		case 177: /* GFX IB CP_INT */
8278 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8279 			break;
8280 		case 181: /* CP EOP event */
8281 			DRM_DEBUG("IH: CP EOP\n");
8282 			/* XXX check the bitfield order! */
8283 			me_id = (ring_id & 0x60) >> 5;
8284 			pipe_id = (ring_id & 0x18) >> 3;
8285 			queue_id = (ring_id & 0x7) >> 0;
8286 			switch (me_id) {
8287 			case 0:
8288 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8289 				break;
8290 			case 1:
8291 			case 2:
8292 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8293 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8294 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8295 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8296 				break;
8297 			}
8298 			break;
8299 		case 184: /* CP Privileged reg access */
8300 			DRM_ERROR("Illegal register access in command stream\n");
8301 			/* XXX check the bitfield order! */
8302 			me_id = (ring_id & 0x60) >> 5;
8303 			pipe_id = (ring_id & 0x18) >> 3;
8304 			queue_id = (ring_id & 0x7) >> 0;
8305 			switch (me_id) {
8306 			case 0:
8307 				/* This results in a full GPU reset, but all we need to do is soft
8308 				 * reset the CP for gfx
8309 				 */
8310 				queue_reset = true;
8311 				break;
8312 			case 1:
8313 				/* XXX compute */
8314 				queue_reset = true;
8315 				break;
8316 			case 2:
8317 				/* XXX compute */
8318 				queue_reset = true;
8319 				break;
8320 			}
8321 			break;
8322 		case 185: /* CP Privileged inst */
8323 			DRM_ERROR("Illegal instruction in command stream\n");
8324 			/* XXX check the bitfield order! */
8325 			me_id = (ring_id & 0x60) >> 5;
8326 			pipe_id = (ring_id & 0x18) >> 3;
8327 			queue_id = (ring_id & 0x7) >> 0;
8328 			switch (me_id) {
8329 			case 0:
8330 				/* This results in a full GPU reset, but all we need to do is soft
8331 				 * reset the CP for gfx
8332 				 */
8333 				queue_reset = true;
8334 				break;
8335 			case 1:
8336 				/* XXX compute */
8337 				queue_reset = true;
8338 				break;
8339 			case 2:
8340 				/* XXX compute */
8341 				queue_reset = true;
8342 				break;
8343 			}
8344 			break;
8345 		case 224: /* SDMA trap event */
8346 			/* XXX check the bitfield order! */
8347 			me_id = (ring_id & 0x3) >> 0;
8348 			queue_id = (ring_id & 0xc) >> 2;
8349 			DRM_DEBUG("IH: SDMA trap\n");
8350 			switch (me_id) {
8351 			case 0:
8352 				switch (queue_id) {
8353 				case 0:
8354 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8355 					break;
8356 				case 1:
8357 					/* XXX compute */
8358 					break;
8359 				case 2:
8360 					/* XXX compute */
8361 					break;
8362 				}
8363 				break;
8364 			case 1:
8365 				switch (queue_id) {
8366 				case 0:
8367 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8368 					break;
8369 				case 1:
8370 					/* XXX compute */
8371 					break;
8372 				case 2:
8373 					/* XXX compute */
8374 					break;
8375 				}
8376 				break;
8377 			}
8378 			break;
8379 		case 230: /* thermal low to high */
8380 			DRM_DEBUG("IH: thermal low to high\n");
8381 			rdev->pm.dpm.thermal.high_to_low = false;
8382 			queue_thermal = true;
8383 			break;
8384 		case 231: /* thermal high to low */
8385 			DRM_DEBUG("IH: thermal high to low\n");
8386 			rdev->pm.dpm.thermal.high_to_low = true;
8387 			queue_thermal = true;
8388 			break;
8389 		case 233: /* GUI IDLE */
8390 			DRM_DEBUG("IH: GUI idle\n");
8391 			break;
8392 		case 241: /* SDMA Privileged inst */
8393 		case 247: /* SDMA Privileged inst */
8394 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8395 			/* XXX check the bitfield order! */
8396 			me_id = (ring_id & 0x3) >> 0;
8397 			queue_id = (ring_id & 0xc) >> 2;
8398 			switch (me_id) {
8399 			case 0:
8400 				switch (queue_id) {
8401 				case 0:
8402 					queue_reset = true;
8403 					break;
8404 				case 1:
8405 					/* XXX compute */
8406 					queue_reset = true;
8407 					break;
8408 				case 2:
8409 					/* XXX compute */
8410 					queue_reset = true;
8411 					break;
8412 				}
8413 				break;
8414 			case 1:
8415 				switch (queue_id) {
8416 				case 0:
8417 					queue_reset = true;
8418 					break;
8419 				case 1:
8420 					/* XXX compute */
8421 					queue_reset = true;
8422 					break;
8423 				case 2:
8424 					/* XXX compute */
8425 					queue_reset = true;
8426 					break;
8427 				}
8428 				break;
8429 			}
8430 			break;
8431 		default:
8432 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8433 			break;
8434 		}
8435 
8436 		/* wptr/rptr are in bytes! */
8437 		rptr += 16;
8438 		rptr &= rdev->ih.ptr_mask;
8439 		WREG32(IH_RB_RPTR, rptr);
8440 	}
8441 	if (queue_dp)
8442 		schedule_work(&rdev->dp_work);
8443 	if (queue_hotplug)
8444 		schedule_delayed_work(&rdev->hotplug_work, 0);
8445 	if (queue_reset) {
8446 		rdev->needs_reset = true;
8447 		wake_up_all(&rdev->fence_queue);
8448 	}
8449 	if (queue_thermal)
8450 		schedule_work(&rdev->pm.dpm.thermal.work);
8451 	rdev->ih.rptr = rptr;
8452 	atomic_set(&rdev->ih.lock, 0);
8453 
8454 	/* make sure wptr hasn't changed while processing */
8455 	wptr = cik_get_ih_wptr(rdev);
8456 	if (wptr != rptr)
8457 		goto restart_ih;
8458 
8459 	return IRQ_HANDLED;
8460 }
8461 
8462 /*
8463  * startup/shutdown callbacks
8464  */
8465 /**
8466  * cik_startup - program the asic to a functional state
8467  *
8468  * @rdev: radeon_device pointer
8469  *
8470  * Programs the asic to a functional state (CIK).
8471  * Called by cik_init() and cik_resume().
8472  * Returns 0 for success, error for failure.
8473  */
cik_startup(struct radeon_device * rdev)8474 static int cik_startup(struct radeon_device *rdev)
8475 {
8476 	struct radeon_ring *ring;
8477 	u32 nop;
8478 	int r;
8479 
8480 	/* enable pcie gen2/3 link */
8481 	cik_pcie_gen3_enable(rdev);
8482 	/* enable aspm */
8483 	cik_program_aspm(rdev);
8484 
8485 	/* scratch needs to be initialized before MC */
8486 	r = r600_vram_scratch_init(rdev);
8487 	if (r)
8488 		return r;
8489 
8490 	cik_mc_program(rdev);
8491 
8492 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8493 		r = ci_mc_load_microcode(rdev);
8494 		if (r) {
8495 			DRM_ERROR("Failed to load MC firmware!\n");
8496 			return r;
8497 		}
8498 	}
8499 
8500 	r = cik_pcie_gart_enable(rdev);
8501 	if (r)
8502 		return r;
8503 	cik_gpu_init(rdev);
8504 
8505 	/* allocate rlc buffers */
8506 	if (rdev->flags & RADEON_IS_IGP) {
8507 		if (rdev->family == CHIP_KAVERI) {
8508 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8509 			rdev->rlc.reg_list_size =
8510 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8511 		} else {
8512 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8513 			rdev->rlc.reg_list_size =
8514 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8515 		}
8516 	}
8517 	rdev->rlc.cs_data = ci_cs_data;
8518 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8519 	r = sumo_rlc_init(rdev);
8520 	if (r) {
8521 		DRM_ERROR("Failed to init rlc BOs!\n");
8522 		return r;
8523 	}
8524 
8525 	/* allocate wb buffer */
8526 	r = radeon_wb_init(rdev);
8527 	if (r)
8528 		return r;
8529 
8530 	/* allocate mec buffers */
8531 	r = cik_mec_init(rdev);
8532 	if (r) {
8533 		DRM_ERROR("Failed to init MEC BOs!\n");
8534 		return r;
8535 	}
8536 
8537 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8538 	if (r) {
8539 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8540 		return r;
8541 	}
8542 
8543 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8544 	if (r) {
8545 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8546 		return r;
8547 	}
8548 
8549 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8550 	if (r) {
8551 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8552 		return r;
8553 	}
8554 
8555 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8556 	if (r) {
8557 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8558 		return r;
8559 	}
8560 
8561 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8562 	if (r) {
8563 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8564 		return r;
8565 	}
8566 
8567 	r = radeon_uvd_resume(rdev);
8568 	if (!r) {
8569 		r = uvd_v4_2_resume(rdev);
8570 		if (!r) {
8571 			r = radeon_fence_driver_start_ring(rdev,
8572 							   R600_RING_TYPE_UVD_INDEX);
8573 			if (r)
8574 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8575 		}
8576 	}
8577 	if (r)
8578 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8579 
8580 	r = radeon_vce_resume(rdev);
8581 	if (!r) {
8582 		r = vce_v2_0_resume(rdev);
8583 		if (!r)
8584 			r = radeon_fence_driver_start_ring(rdev,
8585 							   TN_RING_TYPE_VCE1_INDEX);
8586 		if (!r)
8587 			r = radeon_fence_driver_start_ring(rdev,
8588 							   TN_RING_TYPE_VCE2_INDEX);
8589 	}
8590 	if (r) {
8591 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8592 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8593 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8594 	}
8595 
8596 	/* Enable IRQ */
8597 	if (!rdev->irq.installed) {
8598 		r = radeon_irq_kms_init(rdev);
8599 		if (r)
8600 			return r;
8601 	}
8602 
8603 	r = cik_irq_init(rdev);
8604 	if (r) {
8605 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8606 		radeon_irq_kms_fini(rdev);
8607 		return r;
8608 	}
8609 	cik_irq_set(rdev);
8610 
8611 	if (rdev->family == CHIP_HAWAII) {
8612 		if (rdev->new_fw)
8613 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8614 		else
8615 			nop = RADEON_CP_PACKET2;
8616 	} else {
8617 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8618 	}
8619 
8620 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8621 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8622 			     nop);
8623 	if (r)
8624 		return r;
8625 
8626 	/* set up the compute queues */
8627 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8628 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8629 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8630 			     nop);
8631 	if (r)
8632 		return r;
8633 	ring->me = 1; /* first MEC */
8634 	ring->pipe = 0; /* first pipe */
8635 	ring->queue = 0; /* first queue */
8636 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8637 
8638 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8639 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8640 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8641 			     nop);
8642 	if (r)
8643 		return r;
8644 	/* dGPU only have 1 MEC */
8645 	ring->me = 1; /* first MEC */
8646 	ring->pipe = 0; /* first pipe */
8647 	ring->queue = 1; /* second queue */
8648 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8649 
8650 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8651 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8652 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8653 	if (r)
8654 		return r;
8655 
8656 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8657 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8658 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8659 	if (r)
8660 		return r;
8661 
8662 	r = cik_cp_resume(rdev);
8663 	if (r)
8664 		return r;
8665 
8666 	r = cik_sdma_resume(rdev);
8667 	if (r)
8668 		return r;
8669 
8670 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8671 	if (ring->ring_size) {
8672 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8673 				     RADEON_CP_PACKET2);
8674 		if (!r)
8675 			r = uvd_v1_0_init(rdev);
8676 		if (r)
8677 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8678 	}
8679 
8680 	r = -ENOENT;
8681 
8682 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8683 	if (ring->ring_size)
8684 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8685 				     VCE_CMD_NO_OP);
8686 
8687 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8688 	if (ring->ring_size)
8689 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8690 				     VCE_CMD_NO_OP);
8691 
8692 	if (!r)
8693 		r = vce_v1_0_init(rdev);
8694 	else if (r != -ENOENT)
8695 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8696 
8697 	r = radeon_ib_pool_init(rdev);
8698 	if (r) {
8699 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8700 		return r;
8701 	}
8702 
8703 	r = radeon_vm_manager_init(rdev);
8704 	if (r) {
8705 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8706 		return r;
8707 	}
8708 
8709 	r = radeon_audio_init(rdev);
8710 	if (r)
8711 		return r;
8712 
8713 	r = radeon_kfd_resume(rdev);
8714 	if (r)
8715 		return r;
8716 
8717 	return 0;
8718 }
8719 
8720 /**
8721  * cik_resume - resume the asic to a functional state
8722  *
8723  * @rdev: radeon_device pointer
8724  *
8725  * Programs the asic to a functional state (CIK).
8726  * Called at resume.
8727  * Returns 0 for success, error for failure.
8728  */
cik_resume(struct radeon_device * rdev)8729 int cik_resume(struct radeon_device *rdev)
8730 {
8731 	int r;
8732 
8733 	/* post card */
8734 	atom_asic_init(rdev->mode_info.atom_context);
8735 
8736 	/* init golden registers */
8737 	cik_init_golden_registers(rdev);
8738 
8739 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8740 		radeon_pm_resume(rdev);
8741 
8742 	rdev->accel_working = true;
8743 	r = cik_startup(rdev);
8744 	if (r) {
8745 		DRM_ERROR("cik startup failed on resume\n");
8746 		rdev->accel_working = false;
8747 		return r;
8748 	}
8749 
8750 	return r;
8751 
8752 }
8753 
8754 /**
8755  * cik_suspend - suspend the asic
8756  *
8757  * @rdev: radeon_device pointer
8758  *
8759  * Bring the chip into a state suitable for suspend (CIK).
8760  * Called at suspend.
8761  * Returns 0 for success.
8762  */
cik_suspend(struct radeon_device * rdev)8763 int cik_suspend(struct radeon_device *rdev)
8764 {
8765 	radeon_kfd_suspend(rdev);
8766 	radeon_pm_suspend(rdev);
8767 	radeon_audio_fini(rdev);
8768 	radeon_vm_manager_fini(rdev);
8769 	cik_cp_enable(rdev, false);
8770 	cik_sdma_enable(rdev, false);
8771 	uvd_v1_0_fini(rdev);
8772 	radeon_uvd_suspend(rdev);
8773 	radeon_vce_suspend(rdev);
8774 	cik_fini_pg(rdev);
8775 	cik_fini_cg(rdev);
8776 	cik_irq_suspend(rdev);
8777 	radeon_wb_disable(rdev);
8778 	cik_pcie_gart_disable(rdev);
8779 	return 0;
8780 }
8781 
8782 /* Plan is to move initialization in that function and use
8783  * helper function so that radeon_device_init pretty much
8784  * do nothing more than calling asic specific function. This
8785  * should also allow to remove a bunch of callback function
8786  * like vram_info.
8787  */
8788 /**
8789  * cik_init - asic specific driver and hw init
8790  *
8791  * @rdev: radeon_device pointer
8792  *
8793  * Setup asic specific driver variables and program the hw
8794  * to a functional state (CIK).
8795  * Called at driver startup.
8796  * Returns 0 for success, errors for failure.
8797  */
cik_init(struct radeon_device * rdev)8798 int cik_init(struct radeon_device *rdev)
8799 {
8800 	struct radeon_ring *ring;
8801 	int r;
8802 
8803 	/* Read BIOS */
8804 	if (!radeon_get_bios(rdev)) {
8805 		if (ASIC_IS_AVIVO(rdev))
8806 			return -EINVAL;
8807 	}
8808 	/* Must be an ATOMBIOS */
8809 	if (!rdev->is_atom_bios) {
8810 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8811 		return -EINVAL;
8812 	}
8813 	r = radeon_atombios_init(rdev);
8814 	if (r)
8815 		return r;
8816 
8817 	/* Post card if necessary */
8818 	if (!radeon_card_posted(rdev)) {
8819 		if (!rdev->bios) {
8820 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8821 			return -EINVAL;
8822 		}
8823 		DRM_INFO("GPU not posted. posting now...\n");
8824 		atom_asic_init(rdev->mode_info.atom_context);
8825 	}
8826 	/* init golden registers */
8827 	cik_init_golden_registers(rdev);
8828 	/* Initialize scratch registers */
8829 	cik_scratch_init(rdev);
8830 	/* Initialize surface registers */
8831 	radeon_surface_init(rdev);
8832 	/* Initialize clocks */
8833 	radeon_get_clock_info(rdev->ddev);
8834 
8835 	/* Fence driver */
8836 	r = radeon_fence_driver_init(rdev);
8837 	if (r)
8838 		return r;
8839 
8840 	/* initialize memory controller */
8841 	r = cik_mc_init(rdev);
8842 	if (r)
8843 		return r;
8844 	/* Memory manager */
8845 	r = radeon_bo_init(rdev);
8846 	if (r)
8847 		return r;
8848 
8849 	if (rdev->flags & RADEON_IS_IGP) {
8850 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8851 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8852 			r = cik_init_microcode(rdev);
8853 			if (r) {
8854 				DRM_ERROR("Failed to load firmware!\n");
8855 				return r;
8856 			}
8857 		}
8858 	} else {
8859 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8860 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8861 		    !rdev->mc_fw) {
8862 			r = cik_init_microcode(rdev);
8863 			if (r) {
8864 				DRM_ERROR("Failed to load firmware!\n");
8865 				return r;
8866 			}
8867 		}
8868 	}
8869 
8870 	/* Initialize power management */
8871 	radeon_pm_init(rdev);
8872 
8873 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8874 	ring->ring_obj = NULL;
8875 	r600_ring_init(rdev, ring, 1024 * 1024);
8876 
8877 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8878 	ring->ring_obj = NULL;
8879 	r600_ring_init(rdev, ring, 1024 * 1024);
8880 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8881 	if (r)
8882 		return r;
8883 
8884 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8885 	ring->ring_obj = NULL;
8886 	r600_ring_init(rdev, ring, 1024 * 1024);
8887 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8888 	if (r)
8889 		return r;
8890 
8891 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8892 	ring->ring_obj = NULL;
8893 	r600_ring_init(rdev, ring, 256 * 1024);
8894 
8895 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8896 	ring->ring_obj = NULL;
8897 	r600_ring_init(rdev, ring, 256 * 1024);
8898 
8899 	r = radeon_uvd_init(rdev);
8900 	if (!r) {
8901 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8902 		ring->ring_obj = NULL;
8903 		r600_ring_init(rdev, ring, 4096);
8904 	}
8905 
8906 	r = radeon_vce_init(rdev);
8907 	if (!r) {
8908 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8909 		ring->ring_obj = NULL;
8910 		r600_ring_init(rdev, ring, 4096);
8911 
8912 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8913 		ring->ring_obj = NULL;
8914 		r600_ring_init(rdev, ring, 4096);
8915 	}
8916 
8917 	rdev->ih.ring_obj = NULL;
8918 	r600_ih_ring_init(rdev, 64 * 1024);
8919 
8920 	r = r600_pcie_gart_init(rdev);
8921 	if (r)
8922 		return r;
8923 
8924 	rdev->accel_working = true;
8925 	r = cik_startup(rdev);
8926 	if (r) {
8927 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8928 		cik_cp_fini(rdev);
8929 		cik_sdma_fini(rdev);
8930 		cik_irq_fini(rdev);
8931 		sumo_rlc_fini(rdev);
8932 		cik_mec_fini(rdev);
8933 		radeon_wb_fini(rdev);
8934 		radeon_ib_pool_fini(rdev);
8935 		radeon_vm_manager_fini(rdev);
8936 		radeon_irq_kms_fini(rdev);
8937 		cik_pcie_gart_fini(rdev);
8938 		rdev->accel_working = false;
8939 	}
8940 
8941 	/* Don't start up if the MC ucode is missing.
8942 	 * The default clocks and voltages before the MC ucode
8943 	 * is loaded are not suffient for advanced operations.
8944 	 */
8945 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8946 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8947 		return -EINVAL;
8948 	}
8949 
8950 	return 0;
8951 }
8952 
8953 /**
8954  * cik_fini - asic specific driver and hw fini
8955  *
8956  * @rdev: radeon_device pointer
8957  *
8958  * Tear down the asic specific driver variables and program the hw
8959  * to an idle state (CIK).
8960  * Called at driver unload.
8961  */
cik_fini(struct radeon_device * rdev)8962 void cik_fini(struct radeon_device *rdev)
8963 {
8964 	radeon_pm_fini(rdev);
8965 	cik_cp_fini(rdev);
8966 	cik_sdma_fini(rdev);
8967 	cik_fini_pg(rdev);
8968 	cik_fini_cg(rdev);
8969 	cik_irq_fini(rdev);
8970 	sumo_rlc_fini(rdev);
8971 	cik_mec_fini(rdev);
8972 	radeon_wb_fini(rdev);
8973 	radeon_vm_manager_fini(rdev);
8974 	radeon_ib_pool_fini(rdev);
8975 	radeon_irq_kms_fini(rdev);
8976 	uvd_v1_0_fini(rdev);
8977 	radeon_uvd_fini(rdev);
8978 	radeon_vce_fini(rdev);
8979 	cik_pcie_gart_fini(rdev);
8980 	r600_vram_scratch_fini(rdev);
8981 	radeon_gem_fini(rdev);
8982 	radeon_fence_driver_fini(rdev);
8983 	radeon_bo_fini(rdev);
8984 	radeon_atombios_fini(rdev);
8985 	kfree(rdev->bios);
8986 	rdev->bios = NULL;
8987 }
8988 
dce8_program_fmt(struct drm_encoder * encoder)8989 void dce8_program_fmt(struct drm_encoder *encoder)
8990 {
8991 	struct drm_device *dev = encoder->dev;
8992 	struct radeon_device *rdev = dev->dev_private;
8993 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8994 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8995 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8996 	int bpc = 0;
8997 	u32 tmp = 0;
8998 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8999 
9000 	if (connector) {
9001 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9002 		bpc = radeon_get_monitor_bpc(connector);
9003 		dither = radeon_connector->dither;
9004 	}
9005 
9006 	/* LVDS/eDP FMT is set up by atom */
9007 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9008 		return;
9009 
9010 	/* not needed for analog */
9011 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9012 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9013 		return;
9014 
9015 	if (bpc == 0)
9016 		return;
9017 
9018 	switch (bpc) {
9019 	case 6:
9020 		if (dither == RADEON_FMT_DITHER_ENABLE)
9021 			/* XXX sort out optimal dither settings */
9022 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9023 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9024 		else
9025 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9026 		break;
9027 	case 8:
9028 		if (dither == RADEON_FMT_DITHER_ENABLE)
9029 			/* XXX sort out optimal dither settings */
9030 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9031 				FMT_RGB_RANDOM_ENABLE |
9032 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9033 		else
9034 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9035 		break;
9036 	case 10:
9037 		if (dither == RADEON_FMT_DITHER_ENABLE)
9038 			/* XXX sort out optimal dither settings */
9039 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9040 				FMT_RGB_RANDOM_ENABLE |
9041 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9042 		else
9043 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9044 		break;
9045 	default:
9046 		/* not needed */
9047 		break;
9048 	}
9049 
9050 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9051 }
9052 
9053 /* display watermark setup */
9054 /**
9055  * dce8_line_buffer_adjust - Set up the line buffer
9056  *
9057  * @rdev: radeon_device pointer
9058  * @radeon_crtc: the selected display controller
9059  * @mode: the current display mode on the selected display
9060  * controller
9061  *
9062  * Setup up the line buffer allocation for
9063  * the selected display controller (CIK).
9064  * Returns the line buffer size in pixels.
9065  */
dce8_line_buffer_adjust(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,struct drm_display_mode * mode)9066 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9067 				   struct radeon_crtc *radeon_crtc,
9068 				   struct drm_display_mode *mode)
9069 {
9070 	u32 tmp, buffer_alloc, i;
9071 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9072 	/*
9073 	 * Line Buffer Setup
9074 	 * There are 6 line buffers, one for each display controllers.
9075 	 * There are 3 partitions per LB. Select the number of partitions
9076 	 * to enable based on the display width.  For display widths larger
9077 	 * than 4096, you need use to use 2 display controllers and combine
9078 	 * them using the stereo blender.
9079 	 */
9080 	if (radeon_crtc->base.enabled && mode) {
9081 		if (mode->crtc_hdisplay < 1920) {
9082 			tmp = 1;
9083 			buffer_alloc = 2;
9084 		} else if (mode->crtc_hdisplay < 2560) {
9085 			tmp = 2;
9086 			buffer_alloc = 2;
9087 		} else if (mode->crtc_hdisplay < 4096) {
9088 			tmp = 0;
9089 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9090 		} else {
9091 			DRM_DEBUG_KMS("Mode too big for LB!\n");
9092 			tmp = 0;
9093 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9094 		}
9095 	} else {
9096 		tmp = 1;
9097 		buffer_alloc = 0;
9098 	}
9099 
9100 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9101 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9102 
9103 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9104 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9105 	for (i = 0; i < rdev->usec_timeout; i++) {
9106 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9107 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
9108 			break;
9109 		udelay(1);
9110 	}
9111 
9112 	if (radeon_crtc->base.enabled && mode) {
9113 		switch (tmp) {
9114 		case 0:
9115 		default:
9116 			return 4096 * 2;
9117 		case 1:
9118 			return 1920 * 2;
9119 		case 2:
9120 			return 2560 * 2;
9121 		}
9122 	}
9123 
9124 	/* controller not enabled, so no lb used */
9125 	return 0;
9126 }
9127 
9128 /**
9129  * cik_get_number_of_dram_channels - get the number of dram channels
9130  *
9131  * @rdev: radeon_device pointer
9132  *
9133  * Look up the number of video ram channels (CIK).
9134  * Used for display watermark bandwidth calculations
9135  * Returns the number of dram channels
9136  */
cik_get_number_of_dram_channels(struct radeon_device * rdev)9137 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9138 {
9139 	u32 tmp = RREG32(MC_SHARED_CHMAP);
9140 
9141 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9142 	case 0:
9143 	default:
9144 		return 1;
9145 	case 1:
9146 		return 2;
9147 	case 2:
9148 		return 4;
9149 	case 3:
9150 		return 8;
9151 	case 4:
9152 		return 3;
9153 	case 5:
9154 		return 6;
9155 	case 6:
9156 		return 10;
9157 	case 7:
9158 		return 12;
9159 	case 8:
9160 		return 16;
9161 	}
9162 }
9163 
9164 struct dce8_wm_params {
9165 	u32 dram_channels; /* number of dram channels */
9166 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9167 	u32 sclk;          /* engine clock in kHz */
9168 	u32 disp_clk;      /* display clock in kHz */
9169 	u32 src_width;     /* viewport width */
9170 	u32 active_time;   /* active display time in ns */
9171 	u32 blank_time;    /* blank time in ns */
9172 	bool interlaced;    /* mode is interlaced */
9173 	fixed20_12 vsc;    /* vertical scale ratio */
9174 	u32 num_heads;     /* number of active crtcs */
9175 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9176 	u32 lb_size;       /* line buffer allocated to pipe */
9177 	u32 vtaps;         /* vertical scaler taps */
9178 };
9179 
9180 /**
9181  * dce8_dram_bandwidth - get the dram bandwidth
9182  *
9183  * @wm: watermark calculation data
9184  *
9185  * Calculate the raw dram bandwidth (CIK).
9186  * Used for display watermark bandwidth calculations
9187  * Returns the dram bandwidth in MBytes/s
9188  */
dce8_dram_bandwidth(struct dce8_wm_params * wm)9189 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9190 {
9191 	/* Calculate raw DRAM Bandwidth */
9192 	fixed20_12 dram_efficiency; /* 0.7 */
9193 	fixed20_12 yclk, dram_channels, bandwidth;
9194 	fixed20_12 a;
9195 
9196 	a.full = dfixed_const(1000);
9197 	yclk.full = dfixed_const(wm->yclk);
9198 	yclk.full = dfixed_div(yclk, a);
9199 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9200 	a.full = dfixed_const(10);
9201 	dram_efficiency.full = dfixed_const(7);
9202 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9203 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9204 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9205 
9206 	return dfixed_trunc(bandwidth);
9207 }
9208 
9209 /**
9210  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9211  *
9212  * @wm: watermark calculation data
9213  *
9214  * Calculate the dram bandwidth used for display (CIK).
9215  * Used for display watermark bandwidth calculations
9216  * Returns the dram bandwidth for display in MBytes/s
9217  */
dce8_dram_bandwidth_for_display(struct dce8_wm_params * wm)9218 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9219 {
9220 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9221 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9222 	fixed20_12 yclk, dram_channels, bandwidth;
9223 	fixed20_12 a;
9224 
9225 	a.full = dfixed_const(1000);
9226 	yclk.full = dfixed_const(wm->yclk);
9227 	yclk.full = dfixed_div(yclk, a);
9228 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9229 	a.full = dfixed_const(10);
9230 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9231 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9232 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9233 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9234 
9235 	return dfixed_trunc(bandwidth);
9236 }
9237 
9238 /**
9239  * dce8_data_return_bandwidth - get the data return bandwidth
9240  *
9241  * @wm: watermark calculation data
9242  *
9243  * Calculate the data return bandwidth used for display (CIK).
9244  * Used for display watermark bandwidth calculations
9245  * Returns the data return bandwidth in MBytes/s
9246  */
dce8_data_return_bandwidth(struct dce8_wm_params * wm)9247 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9248 {
9249 	/* Calculate the display Data return Bandwidth */
9250 	fixed20_12 return_efficiency; /* 0.8 */
9251 	fixed20_12 sclk, bandwidth;
9252 	fixed20_12 a;
9253 
9254 	a.full = dfixed_const(1000);
9255 	sclk.full = dfixed_const(wm->sclk);
9256 	sclk.full = dfixed_div(sclk, a);
9257 	a.full = dfixed_const(10);
9258 	return_efficiency.full = dfixed_const(8);
9259 	return_efficiency.full = dfixed_div(return_efficiency, a);
9260 	a.full = dfixed_const(32);
9261 	bandwidth.full = dfixed_mul(a, sclk);
9262 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9263 
9264 	return dfixed_trunc(bandwidth);
9265 }
9266 
9267 /**
9268  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9269  *
9270  * @wm: watermark calculation data
9271  *
9272  * Calculate the dmif bandwidth used for display (CIK).
9273  * Used for display watermark bandwidth calculations
9274  * Returns the dmif bandwidth in MBytes/s
9275  */
dce8_dmif_request_bandwidth(struct dce8_wm_params * wm)9276 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9277 {
9278 	/* Calculate the DMIF Request Bandwidth */
9279 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9280 	fixed20_12 disp_clk, bandwidth;
9281 	fixed20_12 a, b;
9282 
9283 	a.full = dfixed_const(1000);
9284 	disp_clk.full = dfixed_const(wm->disp_clk);
9285 	disp_clk.full = dfixed_div(disp_clk, a);
9286 	a.full = dfixed_const(32);
9287 	b.full = dfixed_mul(a, disp_clk);
9288 
9289 	a.full = dfixed_const(10);
9290 	disp_clk_request_efficiency.full = dfixed_const(8);
9291 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9292 
9293 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9294 
9295 	return dfixed_trunc(bandwidth);
9296 }
9297 
9298 /**
9299  * dce8_available_bandwidth - get the min available bandwidth
9300  *
9301  * @wm: watermark calculation data
9302  *
9303  * Calculate the min available bandwidth used for display (CIK).
9304  * Used for display watermark bandwidth calculations
9305  * Returns the min available bandwidth in MBytes/s
9306  */
dce8_available_bandwidth(struct dce8_wm_params * wm)9307 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9308 {
9309 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9310 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9311 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9312 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9313 
9314 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9315 }
9316 
9317 /**
9318  * dce8_average_bandwidth - get the average available bandwidth
9319  *
9320  * @wm: watermark calculation data
9321  *
9322  * Calculate the average available bandwidth used for display (CIK).
9323  * Used for display watermark bandwidth calculations
9324  * Returns the average available bandwidth in MBytes/s
9325  */
dce8_average_bandwidth(struct dce8_wm_params * wm)9326 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9327 {
9328 	/* Calculate the display mode Average Bandwidth
9329 	 * DisplayMode should contain the source and destination dimensions,
9330 	 * timing, etc.
9331 	 */
9332 	fixed20_12 bpp;
9333 	fixed20_12 line_time;
9334 	fixed20_12 src_width;
9335 	fixed20_12 bandwidth;
9336 	fixed20_12 a;
9337 
9338 	a.full = dfixed_const(1000);
9339 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9340 	line_time.full = dfixed_div(line_time, a);
9341 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9342 	src_width.full = dfixed_const(wm->src_width);
9343 	bandwidth.full = dfixed_mul(src_width, bpp);
9344 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9345 	bandwidth.full = dfixed_div(bandwidth, line_time);
9346 
9347 	return dfixed_trunc(bandwidth);
9348 }
9349 
9350 /**
9351  * dce8_latency_watermark - get the latency watermark
9352  *
9353  * @wm: watermark calculation data
9354  *
9355  * Calculate the latency watermark (CIK).
9356  * Used for display watermark bandwidth calculations
9357  * Returns the latency watermark in ns
9358  */
dce8_latency_watermark(struct dce8_wm_params * wm)9359 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9360 {
9361 	/* First calculate the latency in ns */
9362 	u32 mc_latency = 2000; /* 2000 ns. */
9363 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9364 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9365 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9366 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9367 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9368 		(wm->num_heads * cursor_line_pair_return_time);
9369 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9370 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9371 	u32 tmp, dmif_size = 12288;
9372 	fixed20_12 a, b, c;
9373 
9374 	if (wm->num_heads == 0)
9375 		return 0;
9376 
9377 	a.full = dfixed_const(2);
9378 	b.full = dfixed_const(1);
9379 	if ((wm->vsc.full > a.full) ||
9380 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9381 	    (wm->vtaps >= 5) ||
9382 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9383 		max_src_lines_per_dst_line = 4;
9384 	else
9385 		max_src_lines_per_dst_line = 2;
9386 
9387 	a.full = dfixed_const(available_bandwidth);
9388 	b.full = dfixed_const(wm->num_heads);
9389 	a.full = dfixed_div(a, b);
9390 
9391 	b.full = dfixed_const(mc_latency + 512);
9392 	c.full = dfixed_const(wm->disp_clk);
9393 	b.full = dfixed_div(b, c);
9394 
9395 	c.full = dfixed_const(dmif_size);
9396 	b.full = dfixed_div(c, b);
9397 
9398 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9399 
9400 	b.full = dfixed_const(1000);
9401 	c.full = dfixed_const(wm->disp_clk);
9402 	b.full = dfixed_div(c, b);
9403 	c.full = dfixed_const(wm->bytes_per_pixel);
9404 	b.full = dfixed_mul(b, c);
9405 
9406 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9407 
9408 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9409 	b.full = dfixed_const(1000);
9410 	c.full = dfixed_const(lb_fill_bw);
9411 	b.full = dfixed_div(c, b);
9412 	a.full = dfixed_div(a, b);
9413 	line_fill_time = dfixed_trunc(a);
9414 
9415 	if (line_fill_time < wm->active_time)
9416 		return latency;
9417 	else
9418 		return latency + (line_fill_time - wm->active_time);
9419 
9420 }
9421 
9422 /**
9423  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9424  * average and available dram bandwidth
9425  *
9426  * @wm: watermark calculation data
9427  *
9428  * Check if the display average bandwidth fits in the display
9429  * dram bandwidth (CIK).
9430  * Used for display watermark bandwidth calculations
9431  * Returns true if the display fits, false if not.
9432  */
dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params * wm)9433 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9434 {
9435 	if (dce8_average_bandwidth(wm) <=
9436 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9437 		return true;
9438 	else
9439 		return false;
9440 }
9441 
9442 /**
9443  * dce8_average_bandwidth_vs_available_bandwidth - check
9444  * average and available bandwidth
9445  *
9446  * @wm: watermark calculation data
9447  *
9448  * Check if the display average bandwidth fits in the display
9449  * available bandwidth (CIK).
9450  * Used for display watermark bandwidth calculations
9451  * Returns true if the display fits, false if not.
9452  */
dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params * wm)9453 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9454 {
9455 	if (dce8_average_bandwidth(wm) <=
9456 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9457 		return true;
9458 	else
9459 		return false;
9460 }
9461 
9462 /**
9463  * dce8_check_latency_hiding - check latency hiding
9464  *
9465  * @wm: watermark calculation data
9466  *
9467  * Check latency hiding (CIK).
9468  * Used for display watermark bandwidth calculations
9469  * Returns true if the display fits, false if not.
9470  */
dce8_check_latency_hiding(struct dce8_wm_params * wm)9471 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9472 {
9473 	u32 lb_partitions = wm->lb_size / wm->src_width;
9474 	u32 line_time = wm->active_time + wm->blank_time;
9475 	u32 latency_tolerant_lines;
9476 	u32 latency_hiding;
9477 	fixed20_12 a;
9478 
9479 	a.full = dfixed_const(1);
9480 	if (wm->vsc.full > a.full)
9481 		latency_tolerant_lines = 1;
9482 	else {
9483 		if (lb_partitions <= (wm->vtaps + 1))
9484 			latency_tolerant_lines = 1;
9485 		else
9486 			latency_tolerant_lines = 2;
9487 	}
9488 
9489 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9490 
9491 	if (dce8_latency_watermark(wm) <= latency_hiding)
9492 		return true;
9493 	else
9494 		return false;
9495 }
9496 
9497 /**
9498  * dce8_program_watermarks - program display watermarks
9499  *
9500  * @rdev: radeon_device pointer
9501  * @radeon_crtc: the selected display controller
9502  * @lb_size: line buffer size
9503  * @num_heads: number of display controllers in use
9504  *
9505  * Calculate and program the display watermarks for the
9506  * selected display controller (CIK).
9507  */
dce8_program_watermarks(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,u32 lb_size,u32 num_heads)9508 static void dce8_program_watermarks(struct radeon_device *rdev,
9509 				    struct radeon_crtc *radeon_crtc,
9510 				    u32 lb_size, u32 num_heads)
9511 {
9512 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9513 	struct dce8_wm_params wm_low, wm_high;
9514 	u32 pixel_period;
9515 	u32 line_time = 0;
9516 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9517 	u32 tmp, wm_mask;
9518 
9519 	if (radeon_crtc->base.enabled && num_heads && mode) {
9520 		pixel_period = 1000000 / (u32)mode->clock;
9521 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9522 
9523 		/* watermark for high clocks */
9524 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9525 		    rdev->pm.dpm_enabled) {
9526 			wm_high.yclk =
9527 				radeon_dpm_get_mclk(rdev, false) * 10;
9528 			wm_high.sclk =
9529 				radeon_dpm_get_sclk(rdev, false) * 10;
9530 		} else {
9531 			wm_high.yclk = rdev->pm.current_mclk * 10;
9532 			wm_high.sclk = rdev->pm.current_sclk * 10;
9533 		}
9534 
9535 		wm_high.disp_clk = mode->clock;
9536 		wm_high.src_width = mode->crtc_hdisplay;
9537 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9538 		wm_high.blank_time = line_time - wm_high.active_time;
9539 		wm_high.interlaced = false;
9540 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9541 			wm_high.interlaced = true;
9542 		wm_high.vsc = radeon_crtc->vsc;
9543 		wm_high.vtaps = 1;
9544 		if (radeon_crtc->rmx_type != RMX_OFF)
9545 			wm_high.vtaps = 2;
9546 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9547 		wm_high.lb_size = lb_size;
9548 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9549 		wm_high.num_heads = num_heads;
9550 
9551 		/* set for high clocks */
9552 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9553 
9554 		/* possibly force display priority to high */
9555 		/* should really do this at mode validation time... */
9556 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9557 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9558 		    !dce8_check_latency_hiding(&wm_high) ||
9559 		    (rdev->disp_priority == 2)) {
9560 			DRM_DEBUG_KMS("force priority to high\n");
9561 		}
9562 
9563 		/* watermark for low clocks */
9564 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9565 		    rdev->pm.dpm_enabled) {
9566 			wm_low.yclk =
9567 				radeon_dpm_get_mclk(rdev, true) * 10;
9568 			wm_low.sclk =
9569 				radeon_dpm_get_sclk(rdev, true) * 10;
9570 		} else {
9571 			wm_low.yclk = rdev->pm.current_mclk * 10;
9572 			wm_low.sclk = rdev->pm.current_sclk * 10;
9573 		}
9574 
9575 		wm_low.disp_clk = mode->clock;
9576 		wm_low.src_width = mode->crtc_hdisplay;
9577 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9578 		wm_low.blank_time = line_time - wm_low.active_time;
9579 		wm_low.interlaced = false;
9580 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9581 			wm_low.interlaced = true;
9582 		wm_low.vsc = radeon_crtc->vsc;
9583 		wm_low.vtaps = 1;
9584 		if (radeon_crtc->rmx_type != RMX_OFF)
9585 			wm_low.vtaps = 2;
9586 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9587 		wm_low.lb_size = lb_size;
9588 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9589 		wm_low.num_heads = num_heads;
9590 
9591 		/* set for low clocks */
9592 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9593 
9594 		/* possibly force display priority to high */
9595 		/* should really do this at mode validation time... */
9596 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9597 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9598 		    !dce8_check_latency_hiding(&wm_low) ||
9599 		    (rdev->disp_priority == 2)) {
9600 			DRM_DEBUG_KMS("force priority to high\n");
9601 		}
9602 
9603 		/* Save number of lines the linebuffer leads before the scanout */
9604 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9605 	}
9606 
9607 	/* select wm A */
9608 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9609 	tmp = wm_mask;
9610 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9611 	tmp |= LATENCY_WATERMARK_MASK(1);
9612 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9613 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9614 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9615 		LATENCY_HIGH_WATERMARK(line_time)));
9616 	/* select wm B */
9617 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9618 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9619 	tmp |= LATENCY_WATERMARK_MASK(2);
9620 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9621 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9622 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9623 		LATENCY_HIGH_WATERMARK(line_time)));
9624 	/* restore original selection */
9625 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9626 
9627 	/* save values for DPM */
9628 	radeon_crtc->line_time = line_time;
9629 	radeon_crtc->wm_high = latency_watermark_a;
9630 	radeon_crtc->wm_low = latency_watermark_b;
9631 }
9632 
9633 /**
9634  * dce8_bandwidth_update - program display watermarks
9635  *
9636  * @rdev: radeon_device pointer
9637  *
9638  * Calculate and program the display watermarks and line
9639  * buffer allocation (CIK).
9640  */
dce8_bandwidth_update(struct radeon_device * rdev)9641 void dce8_bandwidth_update(struct radeon_device *rdev)
9642 {
9643 	struct drm_display_mode *mode = NULL;
9644 	u32 num_heads = 0, lb_size;
9645 	int i;
9646 
9647 	if (!rdev->mode_info.mode_config_initialized)
9648 		return;
9649 
9650 	radeon_update_display_priority(rdev);
9651 
9652 	for (i = 0; i < rdev->num_crtc; i++) {
9653 		if (rdev->mode_info.crtcs[i]->base.enabled)
9654 			num_heads++;
9655 	}
9656 	for (i = 0; i < rdev->num_crtc; i++) {
9657 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9658 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9659 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9660 	}
9661 }
9662 
9663 /**
9664  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9665  *
9666  * @rdev: radeon_device pointer
9667  *
9668  * Fetches a GPU clock counter snapshot (SI).
9669  * Returns the 64 bit clock counter snapshot.
9670  */
cik_get_gpu_clock_counter(struct radeon_device * rdev)9671 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9672 {
9673 	uint64_t clock;
9674 
9675 	mutex_lock(&rdev->gpu_clock_mutex);
9676 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9677 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9678 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9679 	mutex_unlock(&rdev->gpu_clock_mutex);
9680 	return clock;
9681 }
9682 
cik_set_uvd_clock(struct radeon_device * rdev,u32 clock,u32 cntl_reg,u32 status_reg)9683 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9684                               u32 cntl_reg, u32 status_reg)
9685 {
9686 	int r, i;
9687 	struct atom_clock_dividers dividers;
9688 	uint32_t tmp;
9689 
9690 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9691 					   clock, false, &dividers);
9692 	if (r)
9693 		return r;
9694 
9695 	tmp = RREG32_SMC(cntl_reg);
9696 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9697 	tmp |= dividers.post_divider;
9698 	WREG32_SMC(cntl_reg, tmp);
9699 
9700 	for (i = 0; i < 100; i++) {
9701 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9702 			break;
9703 		mdelay(10);
9704 	}
9705 	if (i == 100)
9706 		return -ETIMEDOUT;
9707 
9708 	return 0;
9709 }
9710 
cik_set_uvd_clocks(struct radeon_device * rdev,u32 vclk,u32 dclk)9711 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9712 {
9713 	int r = 0;
9714 
9715 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9716 	if (r)
9717 		return r;
9718 
9719 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9720 	return r;
9721 }
9722 
cik_set_vce_clocks(struct radeon_device * rdev,u32 evclk,u32 ecclk)9723 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9724 {
9725 	int r, i;
9726 	struct atom_clock_dividers dividers;
9727 	u32 tmp;
9728 
9729 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9730 					   ecclk, false, &dividers);
9731 	if (r)
9732 		return r;
9733 
9734 	for (i = 0; i < 100; i++) {
9735 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9736 			break;
9737 		mdelay(10);
9738 	}
9739 	if (i == 100)
9740 		return -ETIMEDOUT;
9741 
9742 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9743 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9744 	tmp |= dividers.post_divider;
9745 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9746 
9747 	for (i = 0; i < 100; i++) {
9748 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9749 			break;
9750 		mdelay(10);
9751 	}
9752 	if (i == 100)
9753 		return -ETIMEDOUT;
9754 
9755 	return 0;
9756 }
9757 
cik_pcie_gen3_enable(struct radeon_device * rdev)9758 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9759 {
9760 	struct pci_dev *root = rdev->pdev->bus->self;
9761 	int bridge_pos, gpu_pos;
9762 	u32 speed_cntl, mask, current_data_rate;
9763 	int ret, i;
9764 	u16 tmp16;
9765 
9766 	if (pci_is_root_bus(rdev->pdev->bus))
9767 		return;
9768 
9769 	if (radeon_pcie_gen2 == 0)
9770 		return;
9771 
9772 	if (rdev->flags & RADEON_IS_IGP)
9773 		return;
9774 
9775 	if (!(rdev->flags & RADEON_IS_PCIE))
9776 		return;
9777 
9778 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9779 	if (ret != 0)
9780 		return;
9781 
9782 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9783 		return;
9784 
9785 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9786 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9787 		LC_CURRENT_DATA_RATE_SHIFT;
9788 	if (mask & DRM_PCIE_SPEED_80) {
9789 		if (current_data_rate == 2) {
9790 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9791 			return;
9792 		}
9793 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9794 	} else if (mask & DRM_PCIE_SPEED_50) {
9795 		if (current_data_rate == 1) {
9796 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9797 			return;
9798 		}
9799 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9800 	}
9801 
9802 	bridge_pos = pci_pcie_cap(root);
9803 	if (!bridge_pos)
9804 		return;
9805 
9806 	gpu_pos = pci_pcie_cap(rdev->pdev);
9807 	if (!gpu_pos)
9808 		return;
9809 
9810 	if (mask & DRM_PCIE_SPEED_80) {
9811 		/* re-try equalization if gen3 is not already enabled */
9812 		if (current_data_rate != 2) {
9813 			u16 bridge_cfg, gpu_cfg;
9814 			u16 bridge_cfg2, gpu_cfg2;
9815 			u32 max_lw, current_lw, tmp;
9816 
9817 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9818 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9819 
9820 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9821 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9822 
9823 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9824 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9825 
9826 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9827 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9828 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9829 
9830 			if (current_lw < max_lw) {
9831 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9832 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9833 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9834 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9835 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9836 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9837 				}
9838 			}
9839 
9840 			for (i = 0; i < 10; i++) {
9841 				/* check status */
9842 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9843 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9844 					break;
9845 
9846 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9847 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9848 
9849 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9850 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9851 
9852 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9853 				tmp |= LC_SET_QUIESCE;
9854 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9855 
9856 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9857 				tmp |= LC_REDO_EQ;
9858 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9859 
9860 				mdelay(100);
9861 
9862 				/* linkctl */
9863 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9864 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9865 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9866 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9867 
9868 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9869 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9870 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9871 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9872 
9873 				/* linkctl2 */
9874 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9875 				tmp16 &= ~((1 << 4) | (7 << 9));
9876 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9877 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9878 
9879 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9880 				tmp16 &= ~((1 << 4) | (7 << 9));
9881 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9882 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9883 
9884 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9885 				tmp &= ~LC_SET_QUIESCE;
9886 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9887 			}
9888 		}
9889 	}
9890 
9891 	/* set the link speed */
9892 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9893 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9894 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9895 
9896 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9897 	tmp16 &= ~0xf;
9898 	if (mask & DRM_PCIE_SPEED_80)
9899 		tmp16 |= 3; /* gen3 */
9900 	else if (mask & DRM_PCIE_SPEED_50)
9901 		tmp16 |= 2; /* gen2 */
9902 	else
9903 		tmp16 |= 1; /* gen1 */
9904 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9905 
9906 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9907 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9908 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9909 
9910 	for (i = 0; i < rdev->usec_timeout; i++) {
9911 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9912 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9913 			break;
9914 		udelay(1);
9915 	}
9916 }
9917 
cik_program_aspm(struct radeon_device * rdev)9918 static void cik_program_aspm(struct radeon_device *rdev)
9919 {
9920 	u32 data, orig;
9921 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9922 	bool disable_clkreq = false;
9923 
9924 	if (radeon_aspm == 0)
9925 		return;
9926 
9927 	/* XXX double check IGPs */
9928 	if (rdev->flags & RADEON_IS_IGP)
9929 		return;
9930 
9931 	if (!(rdev->flags & RADEON_IS_PCIE))
9932 		return;
9933 
9934 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9935 	data &= ~LC_XMIT_N_FTS_MASK;
9936 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9937 	if (orig != data)
9938 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9939 
9940 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9941 	data |= LC_GO_TO_RECOVERY;
9942 	if (orig != data)
9943 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9944 
9945 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9946 	data |= P_IGNORE_EDB_ERR;
9947 	if (orig != data)
9948 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9949 
9950 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9951 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9952 	data |= LC_PMI_TO_L1_DIS;
9953 	if (!disable_l0s)
9954 		data |= LC_L0S_INACTIVITY(7);
9955 
9956 	if (!disable_l1) {
9957 		data |= LC_L1_INACTIVITY(7);
9958 		data &= ~LC_PMI_TO_L1_DIS;
9959 		if (orig != data)
9960 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9961 
9962 		if (!disable_plloff_in_l1) {
9963 			bool clk_req_support;
9964 
9965 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9966 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9967 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9968 			if (orig != data)
9969 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9970 
9971 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9972 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9973 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9974 			if (orig != data)
9975 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9976 
9977 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9978 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9979 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9980 			if (orig != data)
9981 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9982 
9983 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9984 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9985 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9986 			if (orig != data)
9987 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9988 
9989 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9990 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9991 			data |= LC_DYN_LANES_PWR_STATE(3);
9992 			if (orig != data)
9993 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9994 
9995 			if (!disable_clkreq &&
9996 			    !pci_is_root_bus(rdev->pdev->bus)) {
9997 				struct pci_dev *root = rdev->pdev->bus->self;
9998 				u32 lnkcap;
9999 
10000 				clk_req_support = false;
10001 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10002 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10003 					clk_req_support = true;
10004 			} else {
10005 				clk_req_support = false;
10006 			}
10007 
10008 			if (clk_req_support) {
10009 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10010 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10011 				if (orig != data)
10012 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10013 
10014 				orig = data = RREG32_SMC(THM_CLK_CNTL);
10015 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10016 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10017 				if (orig != data)
10018 					WREG32_SMC(THM_CLK_CNTL, data);
10019 
10020 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
10021 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10022 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10023 				if (orig != data)
10024 					WREG32_SMC(MISC_CLK_CTRL, data);
10025 
10026 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10027 				data &= ~BCLK_AS_XCLK;
10028 				if (orig != data)
10029 					WREG32_SMC(CG_CLKPIN_CNTL, data);
10030 
10031 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10032 				data &= ~FORCE_BIF_REFCLK_EN;
10033 				if (orig != data)
10034 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10035 
10036 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10037 				data &= ~MPLL_CLKOUT_SEL_MASK;
10038 				data |= MPLL_CLKOUT_SEL(4);
10039 				if (orig != data)
10040 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10041 			}
10042 		}
10043 	} else {
10044 		if (orig != data)
10045 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10046 	}
10047 
10048 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10049 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10050 	if (orig != data)
10051 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
10052 
10053 	if (!disable_l0s) {
10054 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10055 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10056 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10057 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10058 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10059 				data &= ~LC_L0S_INACTIVITY_MASK;
10060 				if (orig != data)
10061 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10062 			}
10063 		}
10064 	}
10065 }
10066